{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 1620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.5724454649827784, "precision": 0.6002889477486154, "recall": 0.5470704410796576, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7860447185813415, "precision": 0.698526892771497, "recall": 0.8986337593653592, "support": 2269.0 }, "eval_O": { "f1-score": 0.9987820830889982, "precision": 0.9975671292124707, "recall": 1.0, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.8965445389824622, "precision": 0.9051258677512096, "recall": 0.8881243979633962, "support": 14534.0 }, "eval_accuracy": 0.8791279948197712, "eval_loss": 0.3157936632633209, "eval_macro avg": { "f1-score": 0.8134542014088951, "precision": 0.8003772093709481, "recall": 0.8334571496021033, "support": 32431.0 }, "eval_runtime": 4.8184, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.075, "eval_weighted avg": { "f1-score": 0.8781740995293698, "precision": 0.8793943851252293, "recall": 0.8791279948197712, "support": 32431.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.6090644692195831, "precision": 0.680108254397835, "recall": 0.5514592933947773, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8585858585858586, "precision": 0.8960229995208433, "recall": 0.8241516086381666, "support": 2269.0 }, "eval_O": { "f1-score": 0.999458141425088, "precision": 0.9992776523702032, "recall": 0.9996386956914461, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9105885478942475, "precision": 0.8801849235905997, "recall": 0.9431677445988716, "support": 14534.0 }, "eval_accuracy": 0.8990780426135487, "eval_loss": 0.24451124668121338, "eval_macro avg": { "f1-score": 0.8444242542811943, "precision": 0.8638984574698704, "recall": 0.8296043355808155, "support": 32431.0 }, "eval_runtime": 4.8529, "eval_samples_per_second": 16.485, "eval_steps_per_second": 2.061, "eval_weighted avg": { "f1-score": 0.8949194640367888, "precision": 0.8938342965082798, "recall": 0.8990780426135487, "support": 32431.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.6542320859402667, "precision": 0.7079713847726111, "recall": 0.60807548825982, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8915187376725839, "precision": 0.8866608544027899, "recall": 0.8964301454385192, "support": 2269.0 }, "eval_O": { "f1-score": 0.9995936611133684, "precision": 0.9992778479870013, "recall": 0.9999096739228616, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9157990498332153, "precision": 0.8973258501155497, "recall": 0.935048850970139, "support": 14534.0 }, "eval_accuracy": 0.9085442940396534, "eval_loss": 0.22665414214134216, "eval_macro avg": { "f1-score": 0.8652858836398586, "precision": 0.872808984319488, "recall": 0.8598660396478349, "support": 32431.0 }, "eval_runtime": 4.8494, "eval_samples_per_second": 16.497, "eval_steps_per_second": 2.062, "eval_weighted avg": { "f1-score": 0.9059516032151627, "precision": 0.9047762338408377, "recall": 0.9085442940396534, "support": 32431.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6861642294713161, "precision": 0.6424741478360781, "recall": 0.7362299758613122, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8995515695067264, "precision": 0.9155636695572797, "recall": 0.884089907448215, "support": 2269.0 }, "eval_O": { "f1-score": 0.9976481230212574, "precision": 0.9990941208442794, "recall": 0.9962063047601842, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9062532879739067, "precision": 0.9242435081193219, "recall": 0.8889500481629283, "support": 14534.0 }, "eval_accuracy": 0.9037649162838025, "eval_loss": 0.2452327460050583, "eval_macro avg": { "f1-score": 0.8724043024933017, "precision": 0.8703438615892398, "recall": 0.8763690590581599, "support": 32431.0 }, "eval_runtime": 4.8393, "eval_samples_per_second": 16.531, "eval_steps_per_second": 2.066, "eval_weighted avg": { "f1-score": 0.9060584182508294, "precision": 0.9095955973231696, "recall": 0.9037649162838025, "support": 32431.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6640249759846301, "precision": 0.7332272606735614, "recall": 0.6067588325652842, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8848007008322383, "precision": 0.8794079233783195, "recall": 0.8902600264433671, "support": 2269.0 }, "eval_O": { "f1-score": 0.999367774566474, "precision": 0.9992775219001174, "recall": 0.9994580435371692, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9201314377682404, "precision": 0.8973839110529758, "recall": 0.9440621989816981, "support": 14534.0 }, "eval_accuracy": 0.9118127717307515, "eval_loss": 0.28411003947257996, "eval_macro avg": { "f1-score": 0.8670812222878956, "precision": 0.8773241542512435, "recall": 0.8601347753818797, "support": 32431.0 }, "eval_runtime": 4.8228, "eval_samples_per_second": 16.588, "eval_steps_per_second": 2.073, "eval_weighted avg": { "f1-score": 0.9087220731552942, "precision": 0.9078434958291443, "recall": 0.9118127717307515, "support": 32431.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.6496087442553721, "precision": 0.7484258729250143, "recall": 0.5738424402018872, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8865800865800866, "precision": 0.8711186729051468, "recall": 0.9026002644336713, "support": 2269.0 }, "eval_O": { "f1-score": 0.9986892655367232, "precision": 0.9994572100597069, "recall": 0.9979225002258152, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9191778088205947, "precision": 0.889647180015452, "recall": 0.9507362047612495, "support": 14534.0 }, "eval_accuracy": 0.9105177145323918, "eval_loss": 0.3082066774368286, "eval_macro avg": { "f1-score": 0.8635139762981943, "precision": 0.87716223397633, "recall": 0.8562753524056559, "support": 32431.0 }, "eval_runtime": 4.8389, "eval_samples_per_second": 16.533, "eval_steps_per_second": 2.067, "eval_weighted avg": { "f1-score": 0.9061618943658084, "precision": 0.9059932736781678, "recall": 0.9105177145323918, "support": 32431.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 0.8612700700759888, "learning_rate": 1.7530864197530865e-05, "loss": 0.2223, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6977381768334475, "precision": 0.629950495049505, "recall": 0.7818740399385561, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8892209178228387, "precision": 0.8621688741721855, "recall": 0.9180255619215514, "support": 2269.0 }, "eval_O": { "f1-score": 0.9988698521766648, "precision": 0.9998190045248869, "recall": 0.9979225002258152, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9034227633516503, "precision": 0.9449996243143738, "recall": 0.8653502132929682, "support": 14534.0 }, "eval_accuracy": 0.9025623631710401, "eval_loss": 0.3512111008167267, "eval_macro avg": { "f1-score": 0.8723129275461503, "precision": 0.8592344995152377, "recall": 0.8907930788447227, "support": 32431.0 }, "eval_runtime": 4.8174, "eval_samples_per_second": 16.607, "eval_steps_per_second": 2.076, "eval_weighted avg": { "f1-score": 0.9061105026169647, "precision": 0.9136494810618672, "recall": 0.9025623631710401, "support": 32431.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.6987032799389779, "precision": 0.6177710335525206, "recall": 0.80403774412991, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9015364639688379, "precision": 0.8856292517006803, "recall": 0.9180255619215514, "support": 2269.0 }, "eval_O": { "f1-score": 0.998915499322187, "precision": 0.9994574554661362, "recall": 0.9983741306115076, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.8976577489773016, "precision": 0.9472075788830315, "recall": 0.8530342644832806, "support": 14534.0 }, "eval_accuracy": 0.9003114304215103, "eval_loss": 0.3705728054046631, "eval_macro avg": { "f1-score": 0.8742032480518261, "precision": 0.8625163299005921, "recall": 0.8933679252865624, "support": 32431.0 }, "eval_runtime": 4.8542, "eval_samples_per_second": 16.48, "eval_steps_per_second": 2.06, "eval_weighted avg": { "f1-score": 0.9045397397569994, "precision": 0.9144455555782818, "recall": 0.9003114304215103, "support": 32431.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6657139454567107, "precision": 0.7278645833333334, "recall": 0.6133421110379635, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8882521489971346, "precision": 0.8884479717813051, "recall": 0.8880564125165271, "support": 2269.0 }, "eval_O": { "f1-score": 0.9987339482727438, "precision": 0.9999094612947035, "recall": 0.9975611959172613, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9178854152690192, "precision": 0.8955360649299646, "recall": 0.9413788358332187, "support": 14534.0 }, "eval_accuracy": 0.9107335573987851, "eval_loss": 0.41826051473617554, "eval_macro avg": { "f1-score": 0.8676463644989021, "precision": 0.8779395203348266, "recall": 0.8600846388262426, "support": 32431.0 }, "eval_runtime": 4.809, "eval_samples_per_second": 16.635, "eval_steps_per_second": 2.079, "eval_weighted avg": { "f1-score": 0.9079779452489347, "precision": 0.9071100542045434, "recall": 0.9107335573987851, "support": 32431.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6738416222952305, "precision": 0.6964871194379391, "recall": 0.652622339258284, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8951019550136641, "precision": 0.8557073954983923, "recall": 0.9382988100484795, "support": 2269.0 }, "eval_O": { "f1-score": 0.9989605459393501, "precision": 0.9996382054992764, "recall": 0.998283804534369, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9109121470961545, "precision": 0.9083259218717931, "recall": 0.9135131415990092, "support": 14534.0 }, "eval_accuracy": 0.9075267490980852, "eval_loss": 0.5459077954292297, "eval_macro avg": { "f1-score": 0.8697040675860997, "precision": 0.8650396605768502, "recall": 0.8756795238600354, "support": 32431.0 }, "eval_runtime": 4.8478, "eval_samples_per_second": 16.502, "eval_steps_per_second": 2.063, "eval_weighted avg": { "f1-score": 0.9065515080853328, "precision": 0.9060496255197702, "recall": 0.9075267490980852, "support": 32431.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6581417175035196, "precision": 0.7070834383665239, "recall": 0.6155365371955234, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.881979417560762, "precision": 0.876414273281114, "recall": 0.8876156897311591, "support": 2269.0 }, "eval_O": { "f1-score": 0.9999548390010388, "precision": 0.9999096820809249, "recall": 1.0, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9173079519373565, "precision": 0.9002915065588976, "recall": 0.9349800467868447, "support": 14534.0 }, "eval_accuracy": 0.9089759797724399, "eval_loss": 0.5726844668388367, "eval_macro avg": { "f1-score": 0.8643459815006692, "precision": 0.8709247250718651, "recall": 0.8595330684283817, "support": 32431.0 }, "eval_runtime": 4.8447, "eval_samples_per_second": 16.513, "eval_steps_per_second": 2.064, "eval_weighted avg": { "f1-score": 0.9066330640790278, "precision": 0.9054793272287636, "recall": 0.9089759797724399, "support": 32431.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.7079179557694512, "precision": 0.7407673860911271, "recall": 0.6778582400702217, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8947480331703169, "precision": 0.8644207066557108, "recall": 0.9272807404142794, "support": 2269.0 }, "eval_O": { "f1-score": 0.999186477447347, "precision": 0.9999095431931253, "recall": 0.998464456688646, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9243840851702722, "precision": 0.9169374492282697, "recall": 0.9319526627218935, "support": 14534.0 }, "eval_accuracy": 0.9186272393697388, "eval_loss": 0.5660926699638367, "eval_macro avg": { "f1-score": 0.8815591378893468, "precision": 0.8805087712920582, "recall": 0.8838890249737601, "support": 32431.0 }, "eval_runtime": 4.8659, "eval_samples_per_second": 16.441, "eval_steps_per_second": 2.055, "eval_weighted avg": { "f1-score": 0.917429533390557, "precision": 0.9168331226787283, "recall": 0.9186272393697388, "support": 32431.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 2.9101791381835938, "learning_rate": 1.506172839506173e-05, "loss": 0.0294, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { "f1-score": 0.6744648496900223, "precision": 0.7221943887775552, "recall": 0.6326530612244898, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9044719714465672, "precision": 0.863672814755413, "recall": 0.9493168796826796, "support": 2269.0 }, "eval_O": { "f1-score": 0.9995934041111363, "precision": 0.9999096167751266, "recall": 0.9992773913828923, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9161992180860105, "precision": 0.9055171023452725, "recall": 0.9271363698912893, "support": 14534.0 }, "eval_accuracy": 0.9119361105115475, "eval_loss": 0.5764271020889282, "eval_macro avg": { "f1-score": 0.8736823608334341, "precision": 0.8728234806633418, "recall": 0.8770959255453377, "support": 32431.0 }, "eval_runtime": 4.8358, "eval_samples_per_second": 16.543, "eval_steps_per_second": 2.068, "eval_weighted avg": { "f1-score": 0.9098800911419986, "precision": 0.909053016531805, "recall": 0.9119361105115475, "support": 32431.0 }, "step": 1053 }, { "epoch": 14.0, "eval_Claim": { "f1-score": 0.7047413793103449, "precision": 0.6923565530383231, "recall": 0.717577353522054, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9112001687407719, "precision": 0.8737864077669902, "recall": 0.9519612163948876, "support": 2269.0 }, "eval_O": { "f1-score": 0.9992316384180792, "precision": 1.0, "recall": 0.998464456688646, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9145424153781864, "precision": 0.9258919757439007, "recall": 0.903467730838035, "support": 14534.0 }, "eval_accuracy": 0.9131694983195091, "eval_loss": 0.5468704700469971, "eval_macro avg": { "f1-score": 0.8824289004618456, "precision": 0.8730087341373035, "recall": 0.8928676893609057, "support": 32431.0 }, "eval_runtime": 4.8411, "eval_samples_per_second": 16.525, "eval_steps_per_second": 2.066, "eval_weighted avg": { "f1-score": 0.9137390947680665, "precision": 0.9147298617643856, "recall": 0.9131694983195091, "support": 32431.0 }, "step": 1134 }, { "epoch": 15.0, "eval_Claim": { "f1-score": 0.6868638486408813, "precision": 0.7559304164470216, "recall": 0.6293614219881501, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9058546000428909, "precision": 0.8822055137844611, "recall": 0.9308065226972234, "support": 2269.0 }, "eval_O": { "f1-score": 0.9999548349216385, "precision": 1.0, "recall": 0.9999096739228616, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9218029420675262, "precision": 0.9023924075660713, "recall": 0.9420668776661622, "support": 14534.0 }, "eval_accuracy": 0.9170855046097869, "eval_loss": 0.6552095413208008, "eval_macro avg": { "f1-score": 0.8786190564182342, "precision": 0.8851320844493885, "recall": 0.8755361240685993, "support": 32431.0 }, "eval_runtime": 4.8081, "eval_samples_per_second": 16.639, "eval_steps_per_second": 2.08, "eval_weighted avg": { "f1-score": 0.9143537536055225, "precision": 0.9137205288178378, "recall": 0.9170855046097869, "support": 32431.0 }, "step": 1215 }, { "epoch": 16.0, "eval_Claim": { "f1-score": 0.7261954261954263, "precision": 0.6899071696622556, "recall": 0.7665130568356375, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9104052573932091, "precision": 0.9050522648083623, "recall": 0.9158219479947113, "support": 2269.0 }, "eval_O": { "f1-score": 0.9999548349216385, "precision": 1.0, "recall": 0.9999096739228616, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9186291000841041, "precision": 0.9360805599200114, "recall": 0.9018164304389706, "support": 14534.0 }, "eval_accuracy": 0.9172705127809812, "eval_loss": 0.604762613773346, "eval_macro avg": { "f1-score": 0.8887961546485946, "precision": 0.8827599985976573, "recall": 0.8960152772980452, "support": 32431.0 }, "eval_runtime": 4.8523, "eval_samples_per_second": 16.487, "eval_steps_per_second": 2.061, "eval_weighted avg": { "f1-score": 0.918776399254959, "precision": 0.92113920073012, "recall": 0.9172705127809812, "support": 32431.0 }, "step": 1296 }, { "epoch": 17.0, "eval_Claim": { "f1-score": 0.6726977499420088, "precision": 0.7134071340713407, "recall": 0.6363835856923414, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.898166556218246, "precision": 0.9003542958370239, "recall": 0.8959894226531512, "support": 2269.0 }, "eval_O": { "f1-score": 0.9999548349216385, "precision": 1.0, "recall": 0.9999096739228616, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9172189909373732, "precision": 0.9018486500864477, "recall": 0.9331223338378973, "support": 14534.0 }, "eval_accuracy": 0.9116277635595572, "eval_loss": 0.6666390299797058, "eval_macro avg": { "f1-score": 0.8720095330048165, "precision": 0.8789025199987031, "recall": 0.8663512540265629, "support": 32431.0 }, "eval_runtime": 4.8204, "eval_samples_per_second": 16.596, "eval_steps_per_second": 2.075, "eval_weighted avg": { "f1-score": 0.9097710324765249, "precision": 0.9087714991080675, "recall": 0.9116277635595572, "support": 32431.0 }, "step": 1377 }, { "epoch": 18.0, "eval_Claim": { "f1-score": 0.7242054854157598, "precision": 0.7184193478730296, "recall": 0.7300855826201448, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.91550204257149, "precision": 0.8937867338371117, "recall": 0.9382988100484795, "support": 2269.0 }, "eval_O": { "f1-score": 0.9999548349216385, "precision": 1.0, "recall": 0.9999096739228616, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9218198185721211, "precision": 0.9277948146083078, "recall": 0.9159212880143113, "support": 14534.0 }, "eval_accuracy": 0.9200456353488946, "eval_loss": 0.5898464918136597, "eval_macro avg": { "f1-score": 0.8903705453702524, "precision": 0.8850002240796123, "recall": 0.8960538386514494, "support": 32431.0 }, "eval_runtime": 4.8354, "eval_samples_per_second": 16.545, "eval_steps_per_second": 2.068, "eval_weighted avg": { "f1-score": 0.9202833015380038, "precision": 0.9206441029524512, "recall": 0.9200456353488946, "support": 32431.0 }, "step": 1458 }, { "epoch": 18.52, "grad_norm": 0.2594422399997711, "learning_rate": 1.2592592592592593e-05, "loss": 0.0093, "step": 1500 }, { "epoch": 19.0, "eval_Claim": { "f1-score": 0.7198604752561586, "precision": 0.7151830192765866, "recall": 0.7245995172262454, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9025769956002515, "precision": 0.860223642172524, "recall": 0.9493168796826796, "support": 2269.0 }, "eval_O": { "f1-score": 0.9975098474215601, "precision": 1.0, "recall": 0.9950320657573841, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9212571111419453, "precision": 0.9289911851126347, "recall": 0.9136507499655979, "support": 14534.0 }, "eval_accuracy": 0.9173630168665783, "eval_loss": 0.6108261346817017, "eval_macro avg": { "f1-score": 0.8853011073549788, "precision": 0.8760994616404363, "recall": 0.8956498031579767, "support": 32431.0 }, "eval_runtime": 4.8341, "eval_samples_per_second": 16.549, "eval_steps_per_second": 2.069, "eval_weighted avg": { "f1-score": 0.9176816552958715, "precision": 0.9183773040411919, "recall": 0.9173630168665783, "support": 32431.0 }, "step": 1539 }, { "epoch": 20.0, "eval_Claim": { "f1-score": 0.7150392937883518, "precision": 0.7018174133558749, "recall": 0.7287689269256089, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8972902097902098, "precision": 0.8899003034243607, "recall": 0.9048038783605112, "support": 2269.0 }, "eval_O": { "f1-score": 0.9992316384180792, "precision": 1.0, "recall": 0.998464456688646, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9203380437794403, "precision": 0.9266285395452644, "recall": 0.9141323792486583, "support": 14534.0 }, "eval_accuracy": 0.9162221331442139, "eval_loss": 0.6273905038833618, "eval_macro avg": { "f1-score": 0.8829747964440202, "precision": 0.879586564081375, "recall": 0.886542410305856, "support": 32431.0 }, "eval_runtime": 4.8321, "eval_samples_per_second": 16.556, "eval_steps_per_second": 2.069, "eval_weighted avg": { "f1-score": 0.9168102169228345, "precision": 0.9175167258142971, "recall": 0.9162221331442139, "support": 32431.0 }, "step": 1620 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 2875774654536000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }