{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 648, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.5724454649827784, "precision": 0.6002889477486154, "recall": 0.5470704410796576, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.7860447185813415, "precision": 0.698526892771497, "recall": 0.8986337593653592, "support": 2269.0 }, "eval_O": { "f1-score": 0.9987820830889982, "precision": 0.9975671292124707, "recall": 1.0, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.8965445389824622, "precision": 0.9051258677512096, "recall": 0.8881243979633962, "support": 14534.0 }, "eval_accuracy": 0.8791279948197712, "eval_loss": 0.3157936632633209, "eval_macro avg": { "f1-score": 0.8134542014088951, "precision": 0.8003772093709481, "recall": 0.8334571496021033, "support": 32431.0 }, "eval_runtime": 4.8184, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.075, "eval_weighted avg": { "f1-score": 0.8781740995293698, "precision": 0.8793943851252293, "recall": 0.8791279948197712, "support": 32431.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.6090644692195831, "precision": 0.680108254397835, "recall": 0.5514592933947773, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8585858585858586, "precision": 0.8960229995208433, "recall": 0.8241516086381666, "support": 2269.0 }, "eval_O": { "f1-score": 0.999458141425088, "precision": 0.9992776523702032, "recall": 0.9996386956914461, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9105885478942475, "precision": 0.8801849235905997, "recall": 0.9431677445988716, "support": 14534.0 }, "eval_accuracy": 0.8990780426135487, "eval_loss": 0.24451124668121338, "eval_macro avg": { "f1-score": 0.8444242542811943, "precision": 0.8638984574698704, "recall": 0.8296043355808155, "support": 32431.0 }, "eval_runtime": 4.8529, "eval_samples_per_second": 16.485, "eval_steps_per_second": 2.061, "eval_weighted avg": { "f1-score": 0.8949194640367888, "precision": 0.8938342965082798, "recall": 0.8990780426135487, "support": 32431.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.6542320859402667, "precision": 0.7079713847726111, "recall": 0.60807548825982, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8915187376725839, "precision": 0.8866608544027899, "recall": 0.8964301454385192, "support": 2269.0 }, "eval_O": { "f1-score": 0.9995936611133684, "precision": 0.9992778479870013, "recall": 0.9999096739228616, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9157990498332153, "precision": 0.8973258501155497, "recall": 0.935048850970139, "support": 14534.0 }, "eval_accuracy": 0.9085442940396534, "eval_loss": 0.22665414214134216, "eval_macro avg": { "f1-score": 0.8652858836398586, "precision": 0.872808984319488, "recall": 0.8598660396478349, "support": 32431.0 }, "eval_runtime": 4.8494, "eval_samples_per_second": 16.497, "eval_steps_per_second": 2.062, "eval_weighted avg": { "f1-score": 0.9059516032151627, "precision": 0.9047762338408377, "recall": 0.9085442940396534, "support": 32431.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6861642294713161, "precision": 0.6424741478360781, "recall": 0.7362299758613122, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8995515695067264, "precision": 0.9155636695572797, "recall": 0.884089907448215, "support": 2269.0 }, "eval_O": { "f1-score": 0.9976481230212574, "precision": 0.9990941208442794, "recall": 0.9962063047601842, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9062532879739067, "precision": 0.9242435081193219, "recall": 0.8889500481629283, "support": 14534.0 }, "eval_accuracy": 0.9037649162838025, "eval_loss": 0.2452327460050583, "eval_macro avg": { "f1-score": 0.8724043024933017, "precision": 0.8703438615892398, "recall": 0.8763690590581599, "support": 32431.0 }, "eval_runtime": 4.8393, "eval_samples_per_second": 16.531, "eval_steps_per_second": 2.066, "eval_weighted avg": { "f1-score": 0.9060584182508294, "precision": 0.9095955973231696, "recall": 0.9037649162838025, "support": 32431.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6640249759846301, "precision": 0.7332272606735614, "recall": 0.6067588325652842, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8848007008322383, "precision": 0.8794079233783195, "recall": 0.8902600264433671, "support": 2269.0 }, "eval_O": { "f1-score": 0.999367774566474, "precision": 0.9992775219001174, "recall": 0.9994580435371692, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9201314377682404, "precision": 0.8973839110529758, "recall": 0.9440621989816981, "support": 14534.0 }, "eval_accuracy": 0.9118127717307515, "eval_loss": 0.28411003947257996, "eval_macro avg": { "f1-score": 0.8670812222878956, "precision": 0.8773241542512435, "recall": 0.8601347753818797, "support": 32431.0 }, "eval_runtime": 4.8228, "eval_samples_per_second": 16.588, "eval_steps_per_second": 2.073, "eval_weighted avg": { "f1-score": 0.9087220731552942, "precision": 0.9078434958291443, "recall": 0.9118127717307515, "support": 32431.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.6496087442553721, "precision": 0.7484258729250143, "recall": 0.5738424402018872, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8865800865800866, "precision": 0.8711186729051468, "recall": 0.9026002644336713, "support": 2269.0 }, "eval_O": { "f1-score": 0.9986892655367232, "precision": 0.9994572100597069, "recall": 0.9979225002258152, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9191778088205947, "precision": 0.889647180015452, "recall": 0.9507362047612495, "support": 14534.0 }, "eval_accuracy": 0.9105177145323918, "eval_loss": 0.3082066774368286, "eval_macro avg": { "f1-score": 0.8635139762981943, "precision": 0.87716223397633, "recall": 0.8562753524056559, "support": 32431.0 }, "eval_runtime": 4.8389, "eval_samples_per_second": 16.533, "eval_steps_per_second": 2.067, "eval_weighted avg": { "f1-score": 0.9061618943658084, "precision": 0.9059932736781678, "recall": 0.9105177145323918, "support": 32431.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 0.8612700700759888, "learning_rate": 1.7530864197530865e-05, "loss": 0.2223, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6977381768334475, "precision": 0.629950495049505, "recall": 0.7818740399385561, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.8892209178228387, "precision": 0.8621688741721855, "recall": 0.9180255619215514, "support": 2269.0 }, "eval_O": { "f1-score": 0.9988698521766648, "precision": 0.9998190045248869, "recall": 0.9979225002258152, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.9034227633516503, "precision": 0.9449996243143738, "recall": 0.8653502132929682, "support": 14534.0 }, "eval_accuracy": 0.9025623631710401, "eval_loss": 0.3512111008167267, "eval_macro avg": { "f1-score": 0.8723129275461503, "precision": 0.8592344995152377, "recall": 0.8907930788447227, "support": 32431.0 }, "eval_runtime": 4.8174, "eval_samples_per_second": 16.607, "eval_steps_per_second": 2.076, "eval_weighted avg": { "f1-score": 0.9061105026169647, "precision": 0.9136494810618672, "recall": 0.9025623631710401, "support": 32431.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.6987032799389779, "precision": 0.6177710335525206, "recall": 0.80403774412991, "support": 4557.0 }, "eval_MajorClaim": { "f1-score": 0.9015364639688379, "precision": 0.8856292517006803, "recall": 0.9180255619215514, "support": 2269.0 }, "eval_O": { "f1-score": 0.998915499322187, "precision": 0.9994574554661362, "recall": 0.9983741306115076, "support": 11071.0 }, "eval_Premise": { "f1-score": 0.8976577489773016, "precision": 0.9472075788830315, "recall": 0.8530342644832806, "support": 14534.0 }, "eval_accuracy": 0.9003114304215103, "eval_loss": 0.3705728054046631, "eval_macro avg": { "f1-score": 0.8742032480518261, "precision": 0.8625163299005921, "recall": 0.8933679252865624, "support": 32431.0 }, "eval_runtime": 4.8542, "eval_samples_per_second": 16.48, "eval_steps_per_second": 2.06, "eval_weighted avg": { "f1-score": 0.9045397397569994, "precision": 0.9144455555782818, "recall": 0.9003114304215103, "support": 32431.0 }, "step": 648 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1150309861814400.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }