{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.0, "eval_steps": 500, "global_step": 1539, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.46243291592128805, "precision": 0.6588785046728972, "recall": 0.3562241616903996, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8342621259029929, "precision": 0.8096955128205128, "recall": 0.8603661132396765, "support": 2349.0 }, "eval_O": { "f1-score": 0.9993622448979592, "precision": 0.9987253027405991, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.8941052999825083, "precision": 0.8401157057392676, "recall": 0.955510692388216, "support": 13374.0 }, "eval_accuracy": 0.8857510808573268, "eval_loss": 0.2754688858985901, "eval_macro avg": { "f1-score": 0.7975406466761871, "precision": 0.8268537564933192, "recall": 0.793025241829573, "support": 32613.0 }, "eval_runtime": 4.8781, "eval_samples_per_second": 16.605, "eval_steps_per_second": 2.255, "eval_weighted avg": { "f1-score": 0.8726239240080429, "precision": 0.8746959682542021, "recall": 0.8857510808573268, "support": 32613.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.6446312571646924, "precision": 0.5518482172064115, "recall": 0.77491961414791, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8912341158733577, "precision": 0.9019180470793374, "recall": 0.8808003405704555, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998005504806733, "precision": 0.9999202106439001, "recall": 0.9996809189534142, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.8690409646250897, "precision": 0.9324023303632625, "recall": 0.8137430835950351, "support": 13374.0 }, "eval_accuracy": 0.8848618649004998, "eval_loss": 0.25178349018096924, "eval_macro avg": { "f1-score": 0.8511767220359533, "precision": 0.8465222013232279, "recall": 0.8672859893167036, "support": 32613.0 }, "eval_runtime": 4.907, "eval_samples_per_second": 16.507, "eval_steps_per_second": 2.242, "eval_weighted avg": { "f1-score": 0.8909418634778542, "precision": 0.9053537287957648, "recall": 0.8848618649004998, "support": 32613.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.6598764019226366, "precision": 0.6576186131386861, "recall": 0.6621497473587505, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9062119366626066, "precision": 0.8661233993015134, "recall": 0.9501915708812261, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998404722022812, "precision": 0.9997607273887382, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9007852612503775, "precision": 0.9097148086014946, "recall": 0.8920293106026619, "support": 13374.0 }, "eval_accuracy": 0.9070002759635728, "eval_loss": 0.23778581619262695, "eval_macro avg": { "f1-score": 0.8666785180094754, "precision": 0.8583043871076081, "recall": 0.876072714645248, "support": 32613.0 }, "eval_runtime": 4.9168, "eval_samples_per_second": 16.474, "eval_steps_per_second": 2.237, "eval_weighted avg": { "f1-score": 0.907089072967282, "precision": 0.9075314026721463, "recall": 0.9070002759635728, "support": 32613.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6604639922667954, "precision": 0.696838347781744, "recall": 0.6276986678915939, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9140214420853479, "precision": 0.9028239202657807, "recall": 0.9255002128565347, "support": 2349.0 }, "eval_O": { "f1-score": 0.9994417862838917, "precision": 0.9991230867346939, "recall": 0.9997606892150607, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9084203149780549, "precision": 0.8963534463934785, "recall": 0.9208165096455809, "support": 13374.0 }, "eval_accuracy": 0.912366234323736, "eval_loss": 0.2598011791706085, "eval_macro avg": { "f1-score": 0.8705868839035225, "precision": 0.8737847002939243, "recall": 0.8684440199021926, "support": 32613.0 }, "eval_runtime": 4.8872, "eval_samples_per_second": 16.574, "eval_steps_per_second": 2.251, "eval_weighted avg": { "f1-score": 0.9107077581074873, "precision": 0.9096864919608297, "recall": 0.912366234323736, "support": 32613.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6731001206272618, "precision": 0.6440713536201469, "recall": 0.7048690858980248, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8871466886734063, "precision": 0.8606885508406725, "recall": 0.9152830991911451, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998005345673595, "precision": 1.0, "recall": 0.9996011486917677, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9039056236399038, "precision": 0.9234729698104376, "recall": 0.8851502916105877, "support": 13374.0 }, "eval_accuracy": 0.9072455769171802, "eval_loss": 0.3166182339191437, "eval_macro avg": { "f1-score": 0.8659882418769828, "precision": 0.8570582185678143, "recall": 0.8762259063478812, "support": 32613.0 }, "eval_runtime": 4.9018, "eval_samples_per_second": 16.525, "eval_steps_per_second": 2.244, "eval_weighted avg": { "f1-score": 0.9087455557232214, "precision": 0.9110652677714914, "recall": 0.9072455769171802, "support": 32613.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.634920634920635, "precision": 0.7171775592828225, "recall": 0.5695911805236564, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9061357171881348, "precision": 0.8666925767586475, "recall": 0.9493401447424435, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998404722022812, "precision": 0.9997607273887382, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9096214165876431, "precision": 0.8879236684705212, "recall": 0.9324061612083147, "support": 13374.0 }, "eval_accuracy": 0.9111397295556987, "eval_loss": 0.32181796431541443, "eval_macro avg": { "f1-score": 0.8626295602246735, "precision": 0.8678886329751823, "recall": 0.8628144290531921, "support": 32613.0 }, "eval_runtime": 4.8856, "eval_samples_per_second": 16.579, "eval_steps_per_second": 2.252, "eval_weighted avg": { "f1-score": 0.907375409471386, "precision": 0.9065876667768511, "recall": 0.9111397295556987, "support": 32613.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 7.612313747406006, "learning_rate": 1.7530864197530865e-05, "loss": 0.2088, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6538414409998775, "precision": 0.7008142894667717, "recall": 0.6127698667891593, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8866711022399646, "precision": 0.9254629629629629, "recall": 0.8510004257130694, "support": 2349.0 }, "eval_O": { "f1-score": 0.999880349379811, "precision": 0.9998404722022812, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9127824473310774, "precision": 0.8890070167977886, "recall": 0.9378645132346344, "support": 13374.0 }, "eval_accuracy": 0.9120596081317266, "eval_loss": 0.4053190350532532, "eval_macro avg": { "f1-score": 0.8632938349876826, "precision": 0.8787811853574512, "recall": 0.8503887588688042, "support": 32613.0 }, "eval_runtime": 4.9061, "eval_samples_per_second": 16.51, "eval_steps_per_second": 2.242, "eval_weighted avg": { "f1-score": 0.909811074225195, "precision": 0.9091110268457285, "recall": 0.9120596081317266, "support": 32613.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.668355545764952, "precision": 0.6421164021164021, "recall": 0.6968305006890216, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8960263537162857, "precision": 0.8676236044657097, "recall": 0.9263516389953171, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9012129071630178, "precision": 0.9198069137340392, "recall": 0.8833557649170032, "support": 13374.0 }, "eval_accuracy": 0.9063870235795541, "eval_loss": 0.4514281749725342, "eval_macro avg": { "f1-score": 0.8663987016610639, "precision": 0.8573867300790377, "recall": 0.8766344761503355, "support": 32613.0 }, "eval_runtime": 4.8816, "eval_samples_per_second": 16.593, "eval_steps_per_second": 2.253, "eval_weighted avg": { "f1-score": 0.9077241398073883, "precision": 0.9098003963445191, "recall": 0.9063870235795541, "support": 32613.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6913841807909605, "precision": 0.7090777402221149, "recall": 0.674552135966927, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9055304497525284, "precision": 0.9155787641427328, "recall": 0.8957002979991485, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998404340194671, "precision": 1.0, "recall": 0.9996809189534142, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9162317231167869, "precision": 0.9072648632798181, "recall": 0.9253775983251085, "support": 13374.0 }, "eval_accuracy": 0.9183147824487168, "eval_loss": 0.4471658766269684, "eval_macro avg": { "f1-score": 0.8782466969199356, "precision": 0.8829803419111664, "recall": 0.8738277378111496, "support": 32613.0 }, "eval_runtime": 4.8813, "eval_samples_per_second": 16.594, "eval_steps_per_second": 2.253, "eval_weighted avg": { "f1-score": 0.9175807345372852, "precision": 0.9170508471898524, "recall": 0.9183147824487168, "support": 32613.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6829727187206021, "precision": 0.6997590361445784, "recall": 0.6669728984841525, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.908567906307787, "precision": 0.8780778395552026, "recall": 0.9412515964240102, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999202233745512, "precision": 1.0, "recall": 0.9998404594767071, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9131976852716073, "precision": 0.9119379613749907, "recall": 0.914460894272469, "support": 13374.0 }, "eval_accuracy": 0.9161683991046515, "eval_loss": 0.45130759477615356, "eval_macro avg": { "f1-score": 0.8761646334186369, "precision": 0.8724437092686929, "recall": 0.8806314621643347, "support": 32613.0 }, "eval_runtime": 4.8986, "eval_samples_per_second": 16.535, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9154630053129839, "precision": 0.9150220459300829, "recall": 0.9161683991046515, "support": 32613.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6209771986970684, "precision": 0.717554953327311, "recall": 0.5473128158015618, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8710538016896399, "precision": 0.9115867845509539, "recall": 0.8339719029374202, "support": 2349.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9144776812837283, "precision": 0.8758814267132197, "recall": 0.9566322715717063, "support": 13374.0 }, "eval_accuracy": 0.9098212369300586, "eval_loss": 0.5217961668968201, "eval_macro avg": { "f1-score": 0.8516271704176092, "precision": 0.8762557911478711, "recall": 0.8344792475776721, "support": 32613.0 }, "eval_runtime": 4.8915, "eval_samples_per_second": 16.559, "eval_steps_per_second": 2.249, "eval_weighted avg": { "f1-score": 0.9050398495319223, "precision": 0.9050252912813879, "recall": 0.9098212369300586, "support": 32613.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.6767220902612826, "precision": 0.7006886374815543, "recall": 0.6543408360128617, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9093525179856115, "precision": 0.8791732909379968, "recall": 0.9416773094934014, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999202233745512, "precision": 1.0, "recall": 0.9998404594767071, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9131033456142309, "precision": 0.9089427280136326, "recall": 0.9173022282039779, "support": 13374.0 }, "eval_accuracy": 0.9156777971974366, "eval_loss": 0.48604533076286316, "eval_macro avg": { "f1-score": 0.8747745443089191, "precision": 0.872201164108296, "recall": 0.878290208296737, "support": 32613.0 }, "eval_runtime": 4.8984, "eval_samples_per_second": 16.536, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9146463407295841, "precision": 0.9139967630227935, "recall": 0.9156777971974366, "support": 32613.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 1.4397014379501343, "learning_rate": 1.506172839506173e-05, "loss": 0.0253, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { "f1-score": 0.6805793134907059, "precision": 0.6758776896942242, "recall": 0.6853468075333027, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.903767491926803, "precision": 0.914198606271777, "recall": 0.8935717326521925, "support": 2349.0 }, "eval_O": { "f1-score": 0.9998005982053838, "precision": 0.9996809952946806, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9103489546321576, "precision": 0.9107236399012197, "recall": 0.9099745775385075, "support": 13374.0 }, "eval_accuracy": 0.9133781007573667, "eval_loss": 0.5282083749771118, "eval_macro avg": { "f1-score": 0.8736240895637626, "precision": 0.8751202327904755, "recall": 0.872203336865589, "support": 32613.0 }, "eval_runtime": 4.89, "eval_samples_per_second": 16.564, "eval_steps_per_second": 2.249, "eval_weighted avg": { "f1-score": 0.9135835215352088, "precision": 0.9138148255025323, "recall": 0.9133781007573667, "support": 32613.0 }, "step": 1053 }, { "epoch": 14.0, "eval_Claim": { "f1-score": 0.6549993880797944, "precision": 0.7010741419963322, "recall": 0.6146072576940744, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9030425963488844, "precision": 0.8624564122433166, "recall": 0.9476372924648787, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9098839358320396, "precision": 0.8997076023391812, "recall": 0.9202931060266188, "support": 13374.0 }, "eval_accuracy": 0.9120596081317266, "eval_loss": 0.5426023602485657, "eval_macro avg": { "f1-score": 0.866971508384752, "precision": 0.8658095391447076, "recall": 0.8706144714809814, "support": 32613.0 }, "eval_runtime": 4.8976, "eval_samples_per_second": 16.539, "eval_steps_per_second": 2.246, "eval_weighted avg": { "f1-score": 0.9099868804586297, "precision": 0.9090570140832118, "recall": 0.9120596081317266, "support": 32613.0 }, "step": 1134 }, { "epoch": 15.0, "eval_Claim": { "f1-score": 0.6663435909861886, "precision": 0.7051282051282052, "recall": 0.6316031235645384, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.8880537974683544, "precision": 0.8293313631326191, "recall": 0.955725840783312, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9116781523561184, "precision": 0.9083958132284166, "recall": 0.9149842978914311, "support": 13374.0 }, "eval_accuracy": 0.9127341857541471, "eval_loss": 0.6165347099304199, "eval_macro avg": { "f1-score": 0.8665089135222377, "precision": 0.8607138453723102, "recall": 0.8755583729944088, "support": 32613.0 }, "eval_runtime": 4.8787, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.255, "eval_weighted avg": { "f1-score": 0.9111575738225369, "precision": 0.910775248619985, "recall": 0.9127341857541471, "support": 32613.0 }, "step": 1215 }, { "epoch": 16.0, "eval_Claim": { "f1-score": 0.674565560821485, "precision": 0.6630434782608695, "recall": 0.6864951768488746, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9101030927835051, "precision": 0.8824470211915234, "recall": 0.9395487441464453, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9041334190523012, "precision": 0.9146836024179356, "recall": 0.8938238372962465, "support": 13374.0 }, "eval_accuracy": 0.9102198509796707, "eval_loss": 0.6390220522880554, "eval_macro avg": { "f1-score": 0.8721905464838953, "precision": 0.8650435254675821, "recall": 0.8799469970074799, "support": 32613.0 }, "eval_runtime": 4.9309, "eval_samples_per_second": 16.427, "eval_steps_per_second": 2.231, "eval_weighted avg": { "f1-score": 0.9107494233350908, "precision": 0.9115609681986994, "recall": 0.9102198509796707, "support": 32613.0 }, "step": 1296 }, { "epoch": 17.0, "eval_Claim": { "f1-score": 0.6808510638297871, "precision": 0.7280334728033473, "recall": 0.6394120349104272, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9149623250807319, "precision": 0.9255226480836237, "recall": 0.9046402724563644, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9178984340699547, "precision": 0.8986960882647944, "recall": 0.9379392851802004, "support": 13374.0 }, "eval_accuracy": 0.9195106245975532, "eval_loss": 0.5812303423881531, "eval_macro avg": { "f1-score": 0.8784179840646908, "precision": 0.8880630522879414, "recall": 0.8704779555713364, "support": 32613.0 }, "eval_runtime": 4.8888, "eval_samples_per_second": 16.568, "eval_steps_per_second": 2.25, "eval_weighted avg": { "f1-score": 0.9175833462373209, "precision": 0.91678385690944, "recall": 0.9195106245975532, "support": 32613.0 }, "step": 1377 }, { "epoch": 18.0, "eval_Claim": { "f1-score": 0.6756586411193748, "precision": 0.7488125174629785, "recall": 0.6155259531465319, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.9198384010206252, "precision": 0.9188615123194562, "recall": 0.9208173690932312, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999202297383536, "precision": 0.9999202297383536, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9191801729776873, "precision": 0.8941600678733032, "recall": 0.9456407955735008, "support": 13374.0 }, "eval_accuracy": 0.9206451415079876, "eval_loss": 0.5839426517486572, "eval_macro avg": { "f1-score": 0.8786493612140102, "precision": 0.8904385818485229, "recall": 0.8704760868879043, "support": 32613.0 }, "eval_runtime": 4.8961, "eval_samples_per_second": 16.544, "eval_steps_per_second": 2.247, "eval_weighted avg": { "f1-score": 0.9177516254510409, "precision": 0.9171873835957982, "recall": 0.9206451415079876, "support": 32613.0 }, "step": 1458 }, { "epoch": 18.52, "grad_norm": 6.301952838897705, "learning_rate": 1.2592592592592593e-05, "loss": 0.0099, "step": 1500 }, { "epoch": 19.0, "eval_Claim": { "f1-score": 0.6969172151021822, "precision": 0.7007197585326214, "recall": 0.6931557188791916, "support": 4354.0 }, "eval_MajorClaim": { "f1-score": 0.93473143590841, "precision": 0.9397590361445783, "recall": 0.929757343550447, "support": 2349.0 }, "eval_O": { "f1-score": 0.9999601132782897, "precision": 1.0, "recall": 0.9999202297383536, "support": 12536.0 }, "eval_Premise": { "f1-score": 0.9126430781850042, "precision": 0.9101658362460028, "recall": 0.9151338417825632, "support": 13374.0 }, "eval_accuracy": 0.9191426731671419, "eval_loss": 0.6210956573486328, "eval_macro avg": { "f1-score": 0.8860629606184716, "precision": 0.8876611577308006, "recall": 0.8844917834876388, "support": 32613.0 }, "eval_runtime": 4.8804, "eval_samples_per_second": 16.597, "eval_steps_per_second": 2.254, "eval_weighted avg": { "f1-score": 0.9189970320181106, "precision": 0.9188662710731514, "recall": 0.9191426731671419, "support": 32613.0 }, "step": 1539 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 2723501493480600.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }