{ "best_metric": null, "best_model_checkpoint": null, "epoch": 13.0, "eval_steps": 500, "global_step": 1053, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.5724454649827784, "precision": 0.6002889477486154, "recall": 0.5470704410796576, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7860447185813415, "precision": 0.698526892771497, "recall": 0.8986337593653592, "support": 2269.0 }, "eval_O": { "f1-score": 0.9987820830889982, "precision": 0.9975671292124707, "recall": 1.0, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.8965445389824622, "precision": 0.9051258677512096, "recall": 0.8881243979633962, "support": 14534.0 }, "eval_accuracy": 0.8791279948197712, "eval_loss": 0.3157936632633209, "eval_macro avg": { "f1-score": 0.8134542014088951, "precision": 0.8003772093709481, "recall": 0.8334571496021033, "support": 32431.0 }, "eval_runtime": 4.8184, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.075, "eval_weighted avg": { "f1-score": 0.8781740995293698, "precision": 0.8793943851252293, "recall": 0.8791279948197712, "support": 32431.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.6090644692195831, "precision": 0.680108254397835, "recall": 0.5514592933947773, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8585858585858586, "precision": 0.8960229995208433, "recall": 0.8241516086381666, "support": 2269.0 }, "eval_O": { "f1-score": 0.999458141425088, "precision": 0.9992776523702032, "recall": 0.9996386956914461, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9105885478942475, "precision": 0.8801849235905997, "recall": 0.9431677445988716, "support": 14534.0 }, "eval_accuracy": 0.8990780426135487, "eval_loss": 0.24451124668121338, "eval_macro avg": { "f1-score": 0.8444242542811943, "precision": 0.8638984574698704, "recall": 0.8296043355808155, "support": 32431.0 }, "eval_runtime": 4.8529, "eval_samples_per_second": 16.485, "eval_steps_per_second": 2.061, "eval_weighted avg": { "f1-score": 0.8949194640367888, "precision": 0.8938342965082798, "recall": 0.8990780426135487, "support": 32431.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.6542320859402667, "precision": 0.7079713847726111, "recall": 0.60807548825982, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8915187376725839, "precision": 0.8866608544027899, "recall": 0.8964301454385192, "support": 2269.0 }, "eval_O": { "f1-score": 0.9995936611133684, "precision": 0.9992778479870013, "recall": 0.9999096739228616, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9157990498332153, "precision": 0.8973258501155497, "recall": 0.935048850970139, "support": 14534.0 }, "eval_accuracy": 0.9085442940396534, "eval_loss": 0.22665414214134216, "eval_macro avg": { "f1-score": 0.8652858836398586, "precision": 0.872808984319488, "recall": 0.8598660396478349, "support": 32431.0 }, "eval_runtime": 4.8494, "eval_samples_per_second": 16.497, "eval_steps_per_second": 2.062, "eval_weighted avg": { "f1-score": 0.9059516032151627, "precision": 0.9047762338408377, "recall": 0.9085442940396534, "support": 32431.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6861642294713161, "precision": 0.6424741478360781, "recall": 0.7362299758613122, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8995515695067264, "precision": 0.9155636695572797, "recall": 0.884089907448215, "support": 2269.0 }, "eval_O": { "f1-score": 0.9976481230212574, "precision": 0.9990941208442794, "recall": 0.9962063047601842, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9062532879739067, "precision": 0.9242435081193219, "recall": 0.8889500481629283, "support": 14534.0 }, "eval_accuracy": 0.9037649162838025, "eval_loss": 0.2452327460050583, "eval_macro avg": { "f1-score": 0.8724043024933017, "precision": 0.8703438615892398, "recall": 0.8763690590581599, "support": 32431.0 }, "eval_runtime": 4.8393, "eval_samples_per_second": 16.531, "eval_steps_per_second": 2.066, "eval_weighted avg": { "f1-score": 0.9060584182508294, "precision": 0.9095955973231696, "recall": 0.9037649162838025, "support": 32431.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6640249759846301, "precision": 0.7332272606735614, "recall": 0.6067588325652842, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8848007008322383, "precision": 0.8794079233783195, "recall": 0.8902600264433671, "support": 2269.0 }, "eval_O": { "f1-score": 0.999367774566474, "precision": 0.9992775219001174, "recall": 0.9994580435371692, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9201314377682404, "precision": 0.8973839110529758, "recall": 0.9440621989816981, "support": 14534.0 }, "eval_accuracy": 0.9118127717307515, "eval_loss": 0.28411003947257996, "eval_macro avg": { "f1-score": 0.8670812222878956, "precision": 0.8773241542512435, "recall": 0.8601347753818797, "support": 32431.0 }, "eval_runtime": 4.8228, "eval_samples_per_second": 16.588, "eval_steps_per_second": 2.073, "eval_weighted avg": { "f1-score": 0.9087220731552942, "precision": 0.9078434958291443, "recall": 0.9118127717307515, "support": 32431.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.6496087442553721, "precision": 0.7484258729250143, "recall": 0.5738424402018872, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8865800865800866, "precision": 0.8711186729051468, "recall": 0.9026002644336713, "support": 2269.0 }, "eval_O": { "f1-score": 0.9986892655367232, "precision": 0.9994572100597069, "recall": 0.9979225002258152, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9191778088205947, "precision": 0.889647180015452, "recall": 0.9507362047612495, "support": 14534.0 }, "eval_accuracy": 0.9105177145323918, "eval_loss": 0.3082066774368286, "eval_macro avg": { "f1-score": 0.8635139762981943, "precision": 0.87716223397633, "recall": 0.8562753524056559, "support": 32431.0 }, "eval_runtime": 4.8389, "eval_samples_per_second": 16.533, "eval_steps_per_second": 2.067, "eval_weighted avg": { "f1-score": 0.9061618943658084, "precision": 0.9059932736781678, "recall": 0.9105177145323918, "support": 32431.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 0.8612700700759888, "learning_rate": 1.7530864197530865e-05, "loss": 0.2223, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6977381768334475, "precision": 0.629950495049505, "recall": 0.7818740399385561, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8892209178228387, "precision": 0.8621688741721855, "recall": 0.9180255619215514, "support": 2269.0 }, "eval_O": { "f1-score": 0.9988698521766648, "precision": 0.9998190045248869, "recall": 0.9979225002258152, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9034227633516503, "precision": 0.9449996243143738, "recall": 0.8653502132929682, "support": 14534.0 }, "eval_accuracy": 0.9025623631710401, "eval_loss": 0.3512111008167267, "eval_macro avg": { "f1-score": 0.8723129275461503, "precision": 0.8592344995152377, "recall": 0.8907930788447227, "support": 32431.0 }, "eval_runtime": 4.8174, "eval_samples_per_second": 16.607, "eval_steps_per_second": 2.076, "eval_weighted avg": { "f1-score": 0.9061105026169647, "precision": 0.9136494810618672, "recall": 0.9025623631710401, "support": 32431.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.6987032799389779, "precision": 0.6177710335525206, "recall": 0.80403774412991, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9015364639688379, "precision": 0.8856292517006803, "recall": 0.9180255619215514, "support": 2269.0 }, "eval_O": { "f1-score": 0.998915499322187, "precision": 0.9994574554661362, "recall": 0.9983741306115076, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.8976577489773016, "precision": 0.9472075788830315, "recall": 0.8530342644832806, "support": 14534.0 }, "eval_accuracy": 0.9003114304215103, "eval_loss": 0.3705728054046631, "eval_macro avg": { "f1-score": 0.8742032480518261, "precision": 0.8625163299005921, "recall": 0.8933679252865624, "support": 32431.0 }, "eval_runtime": 4.8542, "eval_samples_per_second": 16.48, "eval_steps_per_second": 2.06, "eval_weighted avg": { "f1-score": 0.9045397397569994, "precision": 0.9144455555782818, "recall": 0.9003114304215103, "support": 32431.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6657139454567107, "precision": 0.7278645833333334, "recall": 0.6133421110379635, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8882521489971346, "precision": 0.8884479717813051, "recall": 0.8880564125165271, "support": 2269.0 }, "eval_O": { "f1-score": 0.9987339482727438, "precision": 0.9999094612947035, "recall": 0.9975611959172613, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9178854152690192, "precision": 0.8955360649299646, "recall": 0.9413788358332187, "support": 14534.0 }, "eval_accuracy": 0.9107335573987851, "eval_loss": 0.41826051473617554, "eval_macro avg": { "f1-score": 0.8676463644989021, "precision": 0.8779395203348266, "recall": 0.8600846388262426, "support": 32431.0 }, "eval_runtime": 4.809, "eval_samples_per_second": 16.635, "eval_steps_per_second": 2.079, "eval_weighted avg": { "f1-score": 0.9079779452489347, "precision": 0.9071100542045434, "recall": 0.9107335573987851, "support": 32431.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6738416222952305, "precision": 0.6964871194379391, "recall": 0.652622339258284, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8951019550136641, "precision": 0.8557073954983923, "recall": 0.9382988100484795, "support": 2269.0 }, "eval_O": { "f1-score": 0.9989605459393501, "precision": 0.9996382054992764, "recall": 0.998283804534369, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9109121470961545, "precision": 0.9083259218717931, "recall": 0.9135131415990092, "support": 14534.0 }, "eval_accuracy": 0.9075267490980852, "eval_loss": 0.5459077954292297, "eval_macro avg": { "f1-score": 0.8697040675860997, "precision": 0.8650396605768502, "recall": 0.8756795238600354, "support": 32431.0 }, "eval_runtime": 4.8478, "eval_samples_per_second": 16.502, "eval_steps_per_second": 2.063, "eval_weighted avg": { "f1-score": 0.9065515080853328, "precision": 0.9060496255197702, "recall": 0.9075267490980852, "support": 32431.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6581417175035196, "precision": 0.7070834383665239, "recall": 0.6155365371955234, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.881979417560762, "precision": 0.876414273281114, "recall": 0.8876156897311591, "support": 2269.0 }, "eval_O": { "f1-score": 0.9999548390010388, "precision": 0.9999096820809249, "recall": 1.0, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9173079519373565, "precision": 0.9002915065588976, "recall": 0.9349800467868447, "support": 14534.0 }, "eval_accuracy": 0.9089759797724399, "eval_loss": 0.5726844668388367, "eval_macro avg": { "f1-score": 0.8643459815006692, "precision": 0.8709247250718651, "recall": 0.8595330684283817, "support": 32431.0 }, "eval_runtime": 4.8447, "eval_samples_per_second": 16.513, "eval_steps_per_second": 2.064, "eval_weighted avg": { "f1-score": 0.9066330640790278, "precision": 0.9054793272287636, "recall": 0.9089759797724399, "support": 32431.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.7079179557694512, "precision": 0.7407673860911271, "recall": 0.6778582400702217, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8947480331703169, "precision": 0.8644207066557108, "recall": 0.9272807404142794, "support": 2269.0 }, "eval_O": { "f1-score": 0.999186477447347, "precision": 0.9999095431931253, "recall": 0.998464456688646, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9243840851702722, "precision": 0.9169374492282697, "recall": 0.9319526627218935, "support": 14534.0 }, "eval_accuracy": 0.9186272393697388, "eval_loss": 0.5660926699638367, "eval_macro avg": { "f1-score": 0.8815591378893468, "precision": 0.8805087712920582, "recall": 0.8838890249737601, "support": 32431.0 }, "eval_runtime": 4.8659, "eval_samples_per_second": 16.441, "eval_steps_per_second": 2.055, "eval_weighted avg": { "f1-score": 0.917429533390557, "precision": 0.9168331226787283, "recall": 0.9186272393697388, "support": 32431.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 2.9101791381835938, "learning_rate": 1.506172839506173e-05, "loss": 0.0294, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { "f1-score": 0.6744648496900223, "precision": 0.7221943887775552, "recall": 0.6326530612244898, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9044719714465672, "precision": 0.863672814755413, "recall": 0.9493168796826796, "support": 2269.0 }, "eval_O": { "f1-score": 0.9995934041111363, "precision": 0.9999096167751266, "recall": 0.9992773913828923, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9161992180860105, "precision": 0.9055171023452725, "recall": 0.9271363698912893, "support": 14534.0 }, "eval_accuracy": 0.9119361105115475, "eval_loss": 0.5764271020889282, "eval_macro avg": { "f1-score": 0.8736823608334341, "precision": 0.8728234806633418, "recall": 0.8770959255453377, "support": 32431.0 }, "eval_runtime": 4.8358, "eval_samples_per_second": 16.543, "eval_steps_per_second": 2.068, "eval_weighted avg": { "f1-score": 0.9098800911419986, "precision": 0.909053016531805, "recall": 0.9119361105115475, "support": 32431.0 }, "step": 1053 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1869253525448400.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }