{ "best_metric": null, "best_model_checkpoint": null, "epoch": 38.0, "eval_steps": 500, "global_step": 3078, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.46243291592128805, "precision": 0.6588785046728972, "recall": 0.3562241616903996, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8342621259029929, "precision": 0.8096955128205128, "recall": 0.8603661132396765, "support": 2349.0 }, "eval_O": { "f1-score": 0.9993622448979592, "precision": 0.9987253027405991, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.8941052999825083, "precision": 0.8401157057392676, "recall": 0.955510692388216, "support": 13374.0 }, "eval_accuracy": 0.8857510808573268, "eval_loss": 0.2754688858985901, "eval_macro avg": { "f1-score": 0.7975406466761871, "precision": 0.8268537564933192, "recall": 0.793025241829573, "support": 32613.0 }, "eval_runtime": 4.8781, "eval_samples_per_second": 16.605, "eval_steps_per_second": 2.255, "eval_weighted avg": { "f1-score": 0.8726239240080429, "precision": 0.8746959682542021, "recall": 0.8857510808573268, "support": 32613.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.6446312571646924, "precision": 0.5518482172064115, "recall": 0.77491961414791, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8912341158733577, "precision": 0.9019180470793374, "recall": 0.8808003405704555, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998005504806733, "precision": 0.9999202106439001, "recall": 0.9996809189534142, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.8690409646250897, "precision": 0.9324023303632625, "recall": 0.8137430835950351, "support": 13374.0 }, "eval_accuracy": 0.8848618649004998, "eval_loss": 0.25178349018096924, "eval_macro avg": { "f1-score": 0.8511767220359533, "precision": 0.8465222013232279, "recall": 0.8672859893167036, "support": 32613.0 }, "eval_runtime": 4.907, "eval_samples_per_second": 16.507, "eval_steps_per_second": 2.242, "eval_weighted avg": { "f1-score": 0.8909418634778542, "precision": 0.9053537287957648, "recall": 0.8848618649004998, "support": 32613.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.6598764019226366, "precision": 0.6576186131386861, "recall": 0.6621497473587505, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9062119366626066, "precision": 0.8661233993015134, "recall": 0.9501915708812261, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998404722022812, "precision": 0.9997607273887382, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9007852612503775, "precision": 0.9097148086014946, "recall": 0.8920293106026619, "support": 13374.0 }, "eval_accuracy": 0.9070002759635728, "eval_loss": 0.23778581619262695, "eval_macro avg": { "f1-score": 0.8666785180094754, "precision": 0.8583043871076081, "recall": 0.876072714645248, "support": 32613.0 }, "eval_runtime": 4.9168, "eval_samples_per_second": 16.474, "eval_steps_per_second": 2.237, "eval_weighted avg": { "f1-score": 0.907089072967282, "precision": 0.9075314026721463, "recall": 0.9070002759635728, "support": 32613.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6604639922667954, "precision": 0.696838347781744, "recall": 0.6276986678915939, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9140214420853479, "precision": 0.9028239202657807, "recall": 0.9255002128565347, "support": 2349.0 }, "eval_O": { "f1-score": 0.9994417862838917, "precision": 0.9991230867346939, "recall": 0.9997606892150607, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9084203149780549, "precision": 0.8963534463934785, "recall": 0.9208165096455809, "support": 13374.0 }, "eval_accuracy": 0.912366234323736, "eval_loss": 0.2598011791706085, "eval_macro avg": { "f1-score": 0.8705868839035225, "precision": 0.8737847002939243, "recall": 0.8684440199021926, "support": 32613.0 }, "eval_runtime": 4.8872, "eval_samples_per_second": 16.574, "eval_steps_per_second": 2.251, "eval_weighted avg": { "f1-score": 0.9107077581074873, "precision": 0.9096864919608297, "recall": 0.912366234323736, "support": 32613.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6731001206272618, "precision": 0.6440713536201469, "recall": 0.7048690858980248, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8871466886734063, "precision": 0.8606885508406725, "recall": 0.9152830991911451, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998005345673595, "precision": 1.0, "recall": 0.9996011486917677, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9039056236399038, "precision": 0.9234729698104376, "recall": 0.8851502916105877, "support": 13374.0 }, "eval_accuracy": 0.9072455769171802, "eval_loss": 0.3166182339191437, "eval_macro avg": { "f1-score": 0.8659882418769828, "precision": 0.8570582185678143, "recall": 0.8762259063478812, "support": 32613.0 }, "eval_runtime": 4.9018, "eval_samples_per_second": 16.525, "eval_steps_per_second": 2.244, "eval_weighted avg": { "f1-score": 0.9087455557232214, "precision": 0.9110652677714914, "recall": 0.9072455769171802, "support": 32613.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.634920634920635, "precision": 0.7171775592828225, "recall": 0.5695911805236564, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9061357171881348, "precision": 0.8666925767586475, "recall": 0.9493401447424435, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998404722022812, "precision": 0.9997607273887382, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9096214165876431, "precision": 0.8879236684705212, "recall": 0.9324061612083147, "support": 13374.0 }, "eval_accuracy": 0.9111397295556987, "eval_loss": 0.32181796431541443, "eval_macro avg": { "f1-score": 0.8626295602246735, "precision": 0.8678886329751823, "recall": 0.8628144290531921, "support": 32613.0 }, "eval_runtime": 4.8856, "eval_samples_per_second": 16.579, "eval_steps_per_second": 2.252, "eval_weighted avg": { "f1-score": 0.907375409471386, "precision": 0.9065876667768511, "recall": 0.9111397295556987, "support": 32613.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 7.612313747406006, "learning_rate": 1.7530864197530865e-05, "loss": 0.2088, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6538414409998775, "precision": 0.7008142894667717, "recall": 0.6127698667891593, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8866711022399646, "precision": 0.9254629629629629, "recall": 0.8510004257130694, "support": 2349.0 }, "eval_O": { "f1-score": 0.999880349379811, "precision": 0.9998404722022812, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9127824473310774, "precision": 0.8890070167977886, "recall": 0.9378645132346344, "support": 13374.0 }, "eval_accuracy": 0.9120596081317266, "eval_loss": 0.4053190350532532, "eval_macro avg": { "f1-score": 0.8632938349876826, "precision": 0.8787811853574512, "recall": 0.8503887588688042, "support": 32613.0 }, "eval_runtime": 4.9061, "eval_samples_per_second": 16.51, "eval_steps_per_second": 2.242, "eval_weighted avg": { "f1-score": 0.909811074225195, "precision": 0.9091110268457285, "recall": 0.9120596081317266, "support": 32613.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.668355545764952, "precision": 0.6421164021164021, "recall": 0.6968305006890216, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8960263537162857, "precision": 0.8676236044657097, "recall": 0.9263516389953171, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9012129071630178, "precision": 0.9198069137340392, "recall": 0.8833557649170032, "support": 13374.0 }, "eval_accuracy": 0.9063870235795541, "eval_loss": 0.4514281749725342, "eval_macro avg": { "f1-score": 0.8663987016610639, "precision": 0.8573867300790377, "recall": 0.8766344761503355, "support": 32613.0 }, "eval_runtime": 4.8816, "eval_samples_per_second": 16.593, "eval_steps_per_second": 2.253, "eval_weighted avg": { "f1-score": 0.9077241398073883, "precision": 0.9098003963445191, "recall": 0.9063870235795541, "support": 32613.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6913841807909605, "precision": 0.7090777402221149, "recall": 0.674552135966927, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9055304497525284, "precision": 0.9155787641427328, "recall": 0.8957002979991485, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998404340194671, "precision": 1.0, "recall": 0.9996809189534142, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9162317231167869, "precision": 0.9072648632798181, "recall": 0.9253775983251085, "support": 13374.0 }, "eval_accuracy": 0.9183147824487168, "eval_loss": 0.4471658766269684, "eval_macro avg": { "f1-score": 0.8782466969199356, "precision": 0.8829803419111664, "recall": 0.8738277378111496, "support": 32613.0 }, "eval_runtime": 4.8813, "eval_samples_per_second": 16.594, "eval_steps_per_second": 2.253, "eval_weighted avg": { "f1-score": 0.9175807345372852, "precision": 0.9170508471898524, "recall": 0.9183147824487168, "support": 32613.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6829727187206021, "precision": 0.6997590361445784, "recall": 0.6669728984841525, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.908567906307787, "precision": 0.8780778395552026, "recall": 0.9412515964240102, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999202233745512, "precision": 1.0, "recall": 0.9998404594767071, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9131976852716073, "precision": 0.9119379613749907, "recall": 0.914460894272469, "support": 13374.0 }, "eval_accuracy": 0.9161683991046515, "eval_loss": 0.45130759477615356, "eval_macro avg": { "f1-score": 0.8761646334186369, "precision": 0.8724437092686929, "recall": 0.8806314621643347, "support": 32613.0 }, "eval_runtime": 4.8986, "eval_samples_per_second": 16.535, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9154630053129839, "precision": 0.9150220459300829, "recall": 0.9161683991046515, "support": 32613.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6209771986970684, "precision": 0.717554953327311, "recall": 0.5473128158015618, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8710538016896399, "precision": 0.9115867845509539, "recall": 0.8339719029374202, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9144776812837283, "precision": 0.8758814267132197, "recall": 0.9566322715717063, "support": 13374.0 }, "eval_accuracy": 0.9098212369300586, "eval_loss": 0.5217961668968201, "eval_macro avg": { "f1-score": 0.8516271704176092, "precision": 0.8762557911478711, "recall": 0.8344792475776721, "support": 32613.0 }, "eval_runtime": 4.8915, "eval_samples_per_second": 16.559, "eval_steps_per_second": 2.249, "eval_weighted avg": { "f1-score": 0.9050398495319223, "precision": 0.9050252912813879, "recall": 0.9098212369300586, "support": 32613.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.6767220902612826, "precision": 0.7006886374815543, "recall": 0.6543408360128617, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9093525179856115, "precision": 0.8791732909379968, "recall": 0.9416773094934014, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999202233745512, "precision": 1.0, "recall": 0.9998404594767071, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9131033456142309, "precision": 0.9089427280136326, "recall": 0.9173022282039779, "support": 13374.0 }, "eval_accuracy": 0.9156777971974366, "eval_loss": 0.48604533076286316, "eval_macro avg": { "f1-score": 0.8747745443089191, "precision": 0.872201164108296, "recall": 0.878290208296737, "support": 32613.0 }, "eval_runtime": 4.8984, "eval_samples_per_second": 16.536, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9146463407295841, "precision": 0.9139967630227935, "recall": 0.9156777971974366, "support": 32613.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 1.4397014379501343, "learning_rate": 1.506172839506173e-05, "loss": 0.0253, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { "f1-score": 0.6805793134907059, "precision": 0.6758776896942242, "recall": 0.6853468075333027, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.903767491926803, "precision": 0.914198606271777, "recall": 0.8935717326521925, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998005982053838, "precision": 0.9996809952946806, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9103489546321576, "precision": 0.9107236399012197, "recall": 0.9099745775385075, "support": 13374.0 }, "eval_accuracy": 0.9133781007573667, "eval_loss": 0.5282083749771118, "eval_macro avg": { "f1-score": 0.8736240895637626, "precision": 0.8751202327904755, "recall": 0.872203336865589, "support": 32613.0 }, "eval_runtime": 4.89, "eval_samples_per_second": 16.564, "eval_steps_per_second": 2.249, "eval_weighted avg": { "f1-score": 0.9135835215352088, "precision": 0.9138148255025323, "recall": 0.9133781007573667, "support": 32613.0 }, "step": 1053 }, { "epoch": 14.0, "eval_Claim": { "f1-score": 0.6549993880797944, "precision": 0.7010741419963322, "recall": 0.6146072576940744, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9030425963488844, "precision": 0.8624564122433166, "recall": 0.9476372924648787, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9098839358320396, "precision": 0.8997076023391812, "recall": 0.9202931060266188, "support": 13374.0 }, "eval_accuracy": 0.9120596081317266, "eval_loss": 0.5426023602485657, "eval_macro avg": { "f1-score": 0.866971508384752, "precision": 0.8658095391447076, "recall": 0.8706144714809814, "support": 32613.0 }, "eval_runtime": 4.8976, "eval_samples_per_second": 16.539, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9099868804586297, "precision": 0.9090570140832118, "recall": 0.9120596081317266, "support": 32613.0 }, "step": 1134 }, { "epoch": 15.0, "eval_Claim": { "f1-score": 0.6663435909861886, "precision": 0.7051282051282052, "recall": 0.6316031235645384, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8880537974683544, "precision": 0.8293313631326191, "recall": 0.955725840783312, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9116781523561184, "precision": 0.9083958132284166, "recall": 0.9149842978914311, "support": 13374.0 }, "eval_accuracy": 0.9127341857541471, "eval_loss": 0.6165347099304199, "eval_macro avg": { "f1-score": 0.8665089135222377, "precision": 0.8607138453723102, "recall": 0.8755583729944088, "support": 32613.0 }, "eval_runtime": 4.8787, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.255, "eval_weighted avg": { "f1-score": 0.9111575738225369, "precision": 0.910775248619985, "recall": 0.9127341857541471, "support": 32613.0 }, "step": 1215 }, { "epoch": 16.0, "eval_Claim": { "f1-score": 0.674565560821485, "precision": 0.6630434782608695, "recall": 0.6864951768488746, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9101030927835051, "precision": 0.8824470211915234, "recall": 0.9395487441464453, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9041334190523012, "precision": 0.9146836024179356, "recall": 0.8938238372962465, "support": 13374.0 }, "eval_accuracy": 0.9102198509796707, "eval_loss": 0.6390220522880554, "eval_macro avg": { "f1-score": 0.8721905464838953, "precision": 0.8650435254675821, "recall": 0.8799469970074799, "support": 32613.0 }, "eval_runtime": 4.9309, "eval_samples_per_second": 16.427, "eval_steps_per_second": 2.231, "eval_weighted avg": { "f1-score": 0.9107494233350908, "precision": 0.9115609681986994, "recall": 0.9102198509796707, "support": 32613.0 }, "step": 1296 }, { "epoch": 17.0, "eval_Claim": { "f1-score": 0.6808510638297871, "precision": 0.7280334728033473, "recall": 0.6394120349104272, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9149623250807319, "precision": 0.9255226480836237, "recall": 0.9046402724563644, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9178984340699547, "precision": 0.8986960882647944, "recall": 0.9379392851802004, "support": 13374.0 }, "eval_accuracy": 0.9195106245975532, "eval_loss": 0.5812303423881531, "eval_macro avg": { "f1-score": 0.8784179840646908, "precision": 0.8880630522879414, "recall": 0.8704779555713364, "support": 32613.0 }, "eval_runtime": 4.8888, "eval_samples_per_second": 16.568, "eval_steps_per_second": 2.25, "eval_weighted avg": { "f1-score": 0.9175833462373209, "precision": 0.91678385690944, "recall": 0.9195106245975532, "support": 32613.0 }, "step": 1377 }, { "epoch": 18.0, "eval_Claim": { "f1-score": 0.6756586411193748, "precision": 0.7488125174629785, "recall": 0.6155259531465319, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9198384010206252, "precision": 0.9188615123194562, "recall": 0.9208173690932312, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999202297383536, "precision": 0.9999202297383536, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9191801729776873, "precision": 0.8941600678733032, "recall": 0.9456407955735008, "support": 13374.0 }, "eval_accuracy": 0.9206451415079876, "eval_loss": 0.5839426517486572, "eval_macro avg": { "f1-score": 0.8786493612140102, "precision": 0.8904385818485229, "recall": 0.8704760868879043, "support": 32613.0 }, "eval_runtime": 4.8961, "eval_samples_per_second": 16.544, "eval_steps_per_second": 2.247, "eval_weighted avg": { "f1-score": 0.9177516254510409, "precision": 0.9171873835957982, "recall": 0.9206451415079876, "support": 32613.0 }, "step": 1458 }, { "epoch": 18.52, "grad_norm": 6.301952838897705, "learning_rate": 1.2592592592592593e-05, "loss": 0.0099, "step": 1500 }, { "epoch": 19.0, "eval_Claim": { "f1-score": 0.6969172151021822, "precision": 0.7007197585326214, "recall": 0.6931557188791916, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.93473143590841, "precision": 0.9397590361445783, "recall": 0.929757343550447, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9126430781850042, "precision": 0.9101658362460028, "recall": 0.9151338417825632, "support": 13374.0 }, "eval_accuracy": 0.9191426731671419, "eval_loss": 0.6210956573486328, "eval_macro avg": { "f1-score": 0.8860629606184716, "precision": 0.8876611577308006, "recall": 0.8844917834876388, "support": 32613.0 }, "eval_runtime": 4.8804, "eval_samples_per_second": 16.597, "eval_steps_per_second": 2.254, "eval_weighted avg": { "f1-score": 0.9189970320181106, "precision": 0.9188662710731514, "recall": 0.9191426731671419, "support": 32613.0 }, "step": 1539 }, { "epoch": 20.0, "eval_Claim": { "f1-score": 0.6770349931017183, "precision": 0.7457861287648522, "recall": 0.6198897565457051, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.933418150975403, "precision": 0.9298690325306295, "recall": 0.9369944657300979, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.916624189907522, "precision": 0.8932727788816349, "recall": 0.9412292507851054, "support": 13374.0 }, "eval_accuracy": 0.9205838162695857, "eval_loss": 0.6079531311988831, "eval_macro avg": { "f1-score": 0.8817593618157333, "precision": 0.8922319850442791, "recall": 0.8745084256998155, "support": 32613.0 }, "eval_runtime": 4.8699, "eval_samples_per_second": 16.633, "eval_steps_per_second": 2.259, "eval_weighted avg": { "f1-score": 0.9178806455243597, "precision": 0.9172429800024408, "recall": 0.9205838162695857, "support": 32613.0 }, "step": 1620 }, { "epoch": 21.0, "eval_Claim": { "f1-score": 0.6940774487471526, "precision": 0.6884319927699954, "recall": 0.6998162609095085, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9116146152275975, "precision": 0.875686274509804, "recall": 0.9506172839506173, "support": 2349.0 }, "eval_O": { "f1-score": 0.999960116459937, "precision": 0.9999202361011407, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9093450177532674, "precision": 0.9188549618320611, "recall": 0.9000299087782264, "support": 13374.0 }, "eval_accuracy": 0.9153711710054273, "eval_loss": 0.641860842704773, "eval_macro avg": { "f1-score": 0.8787492995469885, "precision": 0.8707233663032503, "recall": 0.8876158634095881, "support": 32613.0 }, "eval_runtime": 4.8748, "eval_samples_per_second": 16.616, "eval_steps_per_second": 2.257, "eval_weighted avg": { "f1-score": 0.9156004118108944, "precision": 0.916143448767362, "recall": 0.9153711710054273, "support": 32613.0 }, "step": 1701 }, { "epoch": 22.0, "eval_Claim": { "f1-score": 0.6900875325289804, "precision": 0.7114634146341463, "recall": 0.6699586587046394, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9227146221365875, "precision": 0.9280792420327304, "recall": 0.9174116645381013, "support": 2349.0 }, "eval_O": { "f1-score": 0.999960116459937, "precision": 0.9999202361011407, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9152730501701939, "precision": 0.9058883843562326, "recall": 0.9248541947061463, "support": 13374.0 }, "eval_accuracy": 0.9191733357863429, "eval_loss": 0.6659213304519653, "eval_macro avg": { "f1-score": 0.8820088303239246, "precision": 0.8863378192810625, "recall": 0.8780561294872218, "support": 32613.0 }, "eval_runtime": 4.8738, "eval_samples_per_second": 16.62, "eval_steps_per_second": 2.257, "eval_weighted avg": { "f1-score": 0.9182982110492126, "precision": 0.91767458311091, "recall": 0.9191733357863429, "support": 32613.0 }, "step": 1782 }, { "epoch": 23.0, "eval_Claim": { "f1-score": 0.6811174220470616, "precision": 0.6875731336297684, "recall": 0.6747818098300413, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9136502398604448, "precision": 0.9365221278497988, "recall": 0.8918688803746275, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9115136218543538, "precision": 0.9049970518867925, "recall": 0.9181247196052041, "support": 13374.0 }, "eval_accuracy": 0.9151871952902217, "eval_loss": 0.6351564526557922, "eval_macro avg": { "f1-score": 0.8765603492600376, "precision": 0.88227307834159, "recall": 0.8711739098870567, "support": 32613.0 }, "eval_runtime": 4.8784, "eval_samples_per_second": 16.604, "eval_steps_per_second": 2.255, "eval_weighted avg": { "f1-score": 0.9149061057787342, "precision": 0.9147583624345246, "recall": 0.9151871952902217, "support": 32613.0 }, "step": 1863 }, { "epoch": 24.0, "eval_Claim": { "f1-score": 0.6994498708880655, "precision": 0.6841642872831101, "recall": 0.7154340836012861, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9161639276055753, "precision": 0.8958502847843776, "recall": 0.9374201787994891, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9120683786543625, "precision": 0.9227825820769878, "recall": 0.9016001196351129, "support": 13374.0 }, "eval_accuracy": 0.9171189402998804, "eval_loss": 0.6426755785942078, "eval_macro avg": { "f1-score": 0.8819105726065732, "precision": 0.8756992885361189, "recall": 0.8885936529435605, "support": 32613.0 }, "eval_runtime": 4.8958, "eval_samples_per_second": 16.545, "eval_steps_per_second": 2.247, "eval_weighted avg": { "f1-score": 0.9177621279849207, "precision": 0.9186673375183761, "recall": 0.9171189402998804, "support": 32613.0 }, "step": 1944 }, { "epoch": 24.69, "grad_norm": 0.09022147953510284, "learning_rate": 1.0123456790123458e-05, "loss": 0.0049, "step": 2000 }, { "epoch": 25.0, "eval_Claim": { "f1-score": 0.7043327556325824, "precision": 0.708672401767031, "recall": 0.7000459347726229, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9363867684478372, "precision": 0.9328263624841572, "recall": 0.9399744572158365, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9144606653474219, "precision": 0.9132672085912447, "recall": 0.9156572454015254, "support": 13374.0 }, "eval_accuracy": 0.9210437555575998, "eval_loss": 0.6521892547607422, "eval_macro avg": { "f1-score": 0.8887950473569604, "precision": 0.8886914932106081, "recall": 0.8889194093474961, "support": 32613.0 }, "eval_runtime": 4.8719, "eval_samples_per_second": 16.626, "eval_steps_per_second": 2.258, "eval_weighted avg": { "f1-score": 0.9208669633417549, "precision": 0.9207004694590575, "recall": 0.9210437555575998, "support": 32613.0 }, "step": 2025 }, { "epoch": 26.0, "eval_Claim": { "f1-score": 0.70013357079252, "precision": 0.6792656587473002, "recall": 0.7223242994947175, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9349714043634824, "precision": 0.9304384485666105, "recall": 0.9395487441464453, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9075579416991192, "precision": 0.9179349904397706, "recall": 0.8974128906834156, "support": 13374.0 }, "eval_accuracy": 0.9165056879158617, "eval_loss": 0.6482347846031189, "eval_macro avg": { "f1-score": 0.8856657292137804, "precision": 0.8819097744384202, "recall": 0.8898214835811445, "support": 32613.0 }, "eval_runtime": 4.9087, "eval_samples_per_second": 16.501, "eval_steps_per_second": 2.241, "eval_weighted avg": { "f1-score": 0.9173737254580834, "precision": 0.9185167005798364, "recall": 0.9165056879158617, "support": 32613.0 }, "step": 2106 }, { "epoch": 27.0, "eval_Claim": { "f1-score": 0.6918882072256305, "precision": 0.6845773381294964, "recall": 0.6993569131832797, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9247223364870424, "precision": 0.8945483485873458, "recall": 0.9570029799914858, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9091732729331824, "precision": 0.9181152790484904, "recall": 0.9004037685060565, "support": 13374.0 }, "eval_accuracy": 0.9159230981510441, "eval_loss": 0.6850813627243042, "eval_macro avg": { "f1-score": 0.8814459541614638, "precision": 0.8743102414413331, "recall": 0.8891909154202055, "support": 32613.0 }, "eval_runtime": 4.8749, "eval_samples_per_second": 16.616, "eval_steps_per_second": 2.256, "eval_weighted avg": { "f1-score": 0.9161971414735486, "precision": 0.9167147316420451, "recall": 0.9159230981510441, "support": 32613.0 }, "step": 2187 }, { "epoch": 28.0, "eval_Claim": { "f1-score": 0.6900420757363254, "precision": 0.7025226082817706, "recall": 0.6779972439136426, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9335321687260333, "precision": 0.9343283582089552, "recall": 0.9327373350361856, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9118346714243236, "precision": 0.9065779748706578, "recall": 0.9171526843128458, "support": 13374.0 }, "eval_accuracy": 0.9181921319719131, "eval_loss": 0.6860224604606628, "eval_macro avg": { "f1-score": 0.8838522289716705, "precision": 0.8858572353403459, "recall": 0.8819718158156685, "support": 32613.0 }, "eval_runtime": 4.8824, "eval_samples_per_second": 16.59, "eval_steps_per_second": 2.253, "eval_weighted avg": { "f1-score": 0.9176766061914672, "precision": 0.9172444910254145, "recall": 0.9181921319719131, "support": 32613.0 }, "step": 2268 }, { "epoch": 29.0, "eval_Claim": { "f1-score": 0.702871276481342, "precision": 0.6608695652173913, "recall": 0.7505741846577859, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9320307628351693, "precision": 0.9106417546709992, "recall": 0.9544487015751384, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9048953733922058, "precision": 0.9299976323889195, "recall": 0.8811126065500224, "support": 13374.0 }, "eval_accuracy": 0.9146352681446049, "eval_loss": 0.7058882713317871, "eval_macro avg": { "f1-score": 0.8849393814967517, "precision": 0.8753772380693275, "recall": 0.8965139306303251, "support": 32613.0 }, "eval_runtime": 4.8777, "eval_samples_per_second": 16.606, "eval_steps_per_second": 2.255, "eval_weighted avg": { "f1-score": 0.9164202159722681, "precision": 0.9195815136371419, "recall": 0.9146352681446049, "support": 32613.0 }, "step": 2349 }, { "epoch": 30.0, "eval_Claim": { "f1-score": 0.6935711954748178, "precision": 0.658813804505063, "recall": 0.7322002756086358, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9355112431056427, "precision": 0.9323467230443975, "recall": 0.9386973180076629, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9033831149039927, "precision": 0.9209258971570607, "recall": 0.8864961866307761, "support": 13374.0 }, "eval_accuracy": 0.913255450280563, "eval_loss": 0.6731629967689514, "eval_macro avg": { "f1-score": 0.8831064166906858, "precision": 0.8780216061766303, "recall": 0.8893285024963571, "support": 32613.0 }, "eval_runtime": 4.9031, "eval_samples_per_second": 16.52, "eval_steps_per_second": 2.243, "eval_weighted avg": { "f1-score": 0.9148091452468388, "precision": 0.9171502378139044, "recall": 0.913255450280563, "support": 32613.0 }, "step": 2430 }, { "epoch": 30.86, "grad_norm": 0.004342585802078247, "learning_rate": 7.654320987654322e-06, "loss": 0.0026, "step": 2500 }, { "epoch": 31.0, "eval_Claim": { "f1-score": 0.7007920557985577, "precision": 0.7220462850182704, "recall": 0.6807533302710151, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9436951402269322, "precision": 0.9491817398794143, "recall": 0.9382716049382716, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9151526364477336, "precision": 0.9058677019998534, "recall": 0.9246298788694481, "support": 13374.0 }, "eval_accuracy": 0.9219942967528286, "eval_loss": 0.6748707294464111, "eval_macro avg": { "f1-score": 0.8898999864378783, "precision": 0.8942739317243846, "recall": 0.8858937609542722, "support": 32613.0 }, "eval_runtime": 4.8881, "eval_samples_per_second": 16.571, "eval_steps_per_second": 2.25, "eval_weighted avg": { "f1-score": 0.9211890913209031, "precision": 0.9206295673042141, "recall": 0.9219942967528286, "support": 32613.0 }, "step": 2511 }, { "epoch": 32.0, "eval_Claim": { "f1-score": 0.7010334054313868, "precision": 0.7351310483870968, "recall": 0.6699586587046394, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9412010008340285, "precision": 0.9223539027380466, "recall": 0.9608343976160069, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9177053667196804, "precision": 0.9079997072385274, "recall": 0.9276207566920891, "support": 13374.0 }, "eval_accuracy": 0.9234047772360715, "eval_loss": 0.7106999754905701, "eval_macro avg": { "f1-score": 0.8899749715658463, "precision": 0.8913711645909177, "recall": 0.8895835106877722, "support": 32613.0 }, "eval_runtime": 4.8986, "eval_samples_per_second": 16.535, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9220884970034353, "precision": 0.9213184308961812, "recall": 0.9234047772360715, "support": 32613.0 }, "step": 2592 }, { "epoch": 33.0, "eval_Claim": { "f1-score": 0.711044912923923, "precision": 0.709419295839049, "recall": 0.7126779972439137, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9253022452504318, "precision": 0.938677179150241, "recall": 0.9123031077054066, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9179324500839708, "precision": 0.916325162059459, "recall": 0.9195453865709585, "support": 13374.0 }, "eval_accuracy": 0.9223009229448379, "eval_loss": 0.7237989902496338, "eval_macro avg": { "f1-score": 0.8885599303841538, "precision": 0.8911054092621873, "recall": 0.8861116803146581, "support": 32613.0 }, "eval_runtime": 4.8767, "eval_samples_per_second": 16.609, "eval_steps_per_second": 2.256, "eval_weighted avg": { "f1-score": 0.9223730749223835, "precision": 0.9224756086618937, "recall": 0.9223009229448379, "support": 32613.0 }, "step": 2673 }, { "epoch": 34.0, "eval_Claim": { "f1-score": 0.7148519468407553, "precision": 0.7258522727272727, "recall": 0.7041800643086816, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9396058486967578, "precision": 0.9354430379746835, "recall": 0.9438058748403576, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9186834462729914, "precision": 0.914936220706022, "recall": 0.9224614924480335, "support": 13374.0 }, "eval_accuracy": 0.9246312820041088, "eval_loss": 0.6951220035552979, "eval_macro avg": { "f1-score": 0.8932753387721986, "precision": 0.8940578828519945, "recall": 0.8925919153338566, "support": 32613.0 }, "eval_runtime": 4.9327, "eval_samples_per_second": 16.421, "eval_steps_per_second": 2.23, "eval_weighted avg": { "f1-score": 0.9242195414603059, "precision": 0.9238669704528689, "recall": 0.9246312820041088, "support": 32613.0 }, "step": 2754 }, { "epoch": 35.0, "eval_Claim": { "f1-score": 0.7058540110816671, "precision": 0.7421479229989868, "recall": 0.6729444189251264, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.939966414777498, "precision": 0.9271221532091097, "recall": 0.9531715623669647, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9197091070176086, "precision": 0.9082756106452788, "recall": 0.9314341259159563, "support": 13374.0 }, "eval_accuracy": 0.9248152577193144, "eval_loss": 0.7126197814941406, "eval_macro avg": { "f1-score": 0.8913724115387659, "precision": 0.8943864217133439, "recall": 0.8893675842366002, "support": 32613.0 }, "eval_runtime": 4.9139, "eval_samples_per_second": 16.484, "eval_steps_per_second": 2.239, "eval_weighted avg": { "f1-score": 0.923464846836294, "precision": 0.9227118023915538, "recall": 0.9248152577193144, "support": 32613.0 }, "step": 2835 }, { "epoch": 36.0, "eval_Claim": { "f1-score": 0.7094183778822989, "precision": 0.7086866834746733, "recall": 0.7101515847496555, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9417721518987342, "precision": 0.9335006273525721, "recall": 0.9501915708812261, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9147501685519513, "precision": 0.9164665265685981, "recall": 0.9130402273067145, "support": 13374.0 }, "eval_accuracy": 0.9220249593720296, "eval_loss": 0.7317129373550415, "eval_macro avg": { "f1-score": 0.8914752029028185, "precision": 0.889663459348961, "recall": 0.8933259031689874, "support": 32613.0 }, "eval_runtime": 4.9011, "eval_samples_per_second": 16.527, "eval_steps_per_second": 2.244, "eval_weighted avg": { "f1-score": 0.9220371979388616, "precision": 0.9220629233688514, "recall": 0.9220249593720296, "support": 32613.0 }, "step": 2916 }, { "epoch": 37.0, "eval_Claim": { "f1-score": 0.7016791711325474, "precision": 0.7286668315607222, "recall": 0.6766192007349564, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9388185654008439, "precision": 0.9305729820158929, "recall": 0.9472115793954874, "support": 2349.0 }, "eval_O": { "f1-score": 0.999960116459937, "precision": 0.9999202361011407, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9166419899318923, "precision": 0.9076381762204955, "recall": 0.9258262299985046, "support": 13374.0 }, "eval_accuracy": 0.9226075491368473, "eval_loss": 0.741832971572876, "eval_macro avg": { "f1-score": 0.8892749607313051, "precision": 0.8916995564745628, "recall": 0.887414252532237, "support": 32613.0 }, "eval_runtime": 4.8986, "eval_samples_per_second": 16.535, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9215670411961056, "precision": 0.9208684993072555, "recall": 0.9226075491368473, "support": 32613.0 }, "step": 2997 }, { "epoch": 37.04, "grad_norm": 0.02783563733100891, "learning_rate": 5.185185185185185e-06, "loss": 0.001, "step": 3000 }, { "epoch": 38.0, "eval_Claim": { "f1-score": 0.7192414431082331, "precision": 0.7242664182580345, "recall": 0.7142857142857143, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.940041928721174, "precision": 0.9260636100784799, "recall": 0.9544487015751384, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9176391382405745, "precision": 0.9180511899416255, "recall": 0.9172274562584118, "support": 13374.0 }, "eval_accuracy": 0.9246312820041088, "eval_loss": 0.7159741520881653, "eval_macro avg": { "f1-score": 0.8942306275174954, "precision": 0.892095304569535, "recall": 0.8964904680298161, "support": 32613.0 }, "eval_runtime": 4.8897, "eval_samples_per_second": 16.565, "eval_steps_per_second": 2.25, "eval_weighted avg": { "f1-score": 0.9244240507984156, "precision": 0.9242570759957419, "recall": 0.9246312820041088, "support": 32613.0 }, "step": 3078 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 5447002986961200.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }