{ "best_metric": null, "best_model_checkpoint": null, "epoch": 35.0, "eval_steps": 500, "global_step": 2835, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.5254768748366867, "precision": 0.5768789443488239, "recall": 0.4824856046065259, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.7984361424847959, "precision": 0.7495921696574225, "recall": 0.854089219330855, "support": 2152.0 }, "eval_O": { "f1-score": 0.9990277532260916, "precision": 0.9988511841640155, "recall": 0.9992043847241867, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8854183656825966, "precision": 0.8719781543651113, "recall": 0.8992793837488611, "support": 12073.0 }, "eval_accuracy": 0.8755765022723447, "eval_loss": 0.28303390741348267, "eval_macro avg": { "f1-score": 0.8020897840575427, "precision": 0.7993251131338432, "recall": 0.8087646481026072, "support": 29705.0 }, "eval_runtime": 4.8106, "eval_samples_per_second": 16.63, "eval_steps_per_second": 2.079, "eval_weighted avg": { "f1-score": 0.8718761173649596, "precision": 0.8700202202343714, "recall": 0.8755765022723447, "support": 29705.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.5578782336033447, "precision": 0.6124497991967871, "recall": 0.5122360844529751, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.784421283598464, "precision": 0.9571619812583668, "recall": 0.6644981412639405, "support": 2152.0 }, "eval_O": { "f1-score": 0.9998674150351351, "precision": 0.9997348652231551, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8977749872463995, "precision": 0.8530201342281879, "recall": 0.9474861260664292, "support": 12073.0 }, "eval_accuracy": 0.8859114627167144, "eval_loss": 0.27363118529319763, "eval_macro avg": { "f1-score": 0.8099854798708358, "precision": 0.8555916949766242, "recall": 0.7810550879458362, "support": 29705.0 }, "eval_runtime": 4.7727, "eval_samples_per_second": 16.762, "eval_steps_per_second": 2.095, "eval_weighted avg": { "f1-score": 0.8807489883812781, "precision": 0.8826802296805741, "recall": 0.8859114627167144, "support": 29705.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.64760736196319, "precision": 0.6627322953289804, "recall": 0.6331573896353166, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9016018306636155, "precision": 0.8881875563570785, "recall": 0.9154275092936803, "support": 2152.0 }, "eval_O": { "f1-score": 0.9996904982977407, "precision": 1.0, "recall": 0.9993811881188119, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8982820417748115, "precision": 0.8936065573770492, "recall": 0.9030067091857865, "support": 12073.0 }, "eval_accuracy": 0.9027436458508669, "eval_loss": 0.22585685551166534, "eval_macro avg": { "f1-score": 0.8617954331748394, "precision": 0.861131602265777, "recall": 0.8627431990583988, "support": 29705.0 }, "eval_runtime": 4.8003, "eval_samples_per_second": 16.666, "eval_steps_per_second": 2.083, "eval_weighted avg": { "f1-score": 0.9019670975035187, "precision": 0.9013351218793044, "recall": 0.9027436458508669, "support": 29705.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6477602966229081, "precision": 0.5561865427637239, "recall": 0.7754318618042226, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8984409799554566, "precision": 0.8627031650983746, "recall": 0.9372676579925651, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.861495720750997, "precision": 0.9384029675907849, "recall": 0.7962395427814131, "support": 12073.0 }, "eval_accuracy": 0.8811311227066151, "eval_loss": 0.32341843843460083, "eval_macro avg": { "f1-score": 0.8519242493323405, "precision": 0.8393231688632209, "recall": 0.8772347656445502, "support": 29705.0 }, "eval_runtime": 4.7708, "eval_samples_per_second": 16.769, "eval_steps_per_second": 2.096, "eval_weighted avg": { "f1-score": 0.8869263673393438, "precision": 0.9027457246003854, "recall": 0.8811311227066151, "support": 29705.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6188788236838649, "precision": 0.6833864888373442, "recall": 0.5654990403071017, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.909733679000707, "precision": 0.9230033476805356, "recall": 0.8968401486988847, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8978936810431294, "precision": 0.8706816059757236, "recall": 0.9268615919821088, "support": 12073.0 }, "eval_accuracy": 0.9018347079616226, "eval_loss": 0.3072384297847748, "eval_macro avg": { "f1-score": 0.8566154962081685, "precision": 0.8692457621526151, "recall": 0.8473001952470238, "support": 29705.0 }, "eval_runtime": 4.7922, "eval_samples_per_second": 16.694, "eval_steps_per_second": 2.087, "eval_weighted avg": { "f1-score": 0.8984684143294738, "precision": 0.8974043833368577, "recall": 0.9018347079616226, "support": 29705.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.620837537840565, "precision": 0.6545212765957447, "recall": 0.5904510556621881, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8993613741466637, "precision": 0.8547509418166597, "recall": 0.9488847583643123, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8931567692054615, "precision": 0.8869558114841134, "recall": 0.8994450426571688, "support": 12073.0 }, "eval_accuracy": 0.8979633058407676, "eval_loss": 0.3634909987449646, "eval_macro avg": { "f1-score": 0.8533278705744156, "precision": 0.8490349090033436, "recall": 0.8596952141709173, "support": 29705.0 }, "eval_runtime": 4.7765, "eval_samples_per_second": 16.749, "eval_steps_per_second": 2.094, "eval_weighted avg": { "f1-score": 0.8960665959131486, "precision": 0.8950239457358053, "recall": 0.8979633058407676, "support": 29705.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 9.48759651184082, "learning_rate": 1.7530864197530865e-05, "loss": 0.214, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6812872467222885, "precision": 0.6769303647560397, "recall": 0.685700575815739, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9276377217553688, "precision": 0.9319887429643527, "recall": 0.9233271375464684, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999115983026874, "precision": 0.9999115983026874, "recall": 0.9999115983026874, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9017086927670869, "precision": 0.9029819752471135, "recall": 0.9004389961070156, "support": 12073.0 }, "eval_accuracy": 0.9098468271334792, "eval_loss": 0.3468942642211914, "eval_macro avg": { "f1-score": 0.8776363148868579, "precision": 0.8779531703175483, "recall": 0.8773445769429775, "support": 29705.0 }, "eval_runtime": 4.7913, "eval_samples_per_second": 16.697, "eval_steps_per_second": 2.087, "eval_weighted avg": { "f1-score": 0.9100559053806798, "precision": 0.910277290769933, "recall": 0.9098468271334792, "support": 29705.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.6928905033731187, "precision": 0.6105725260654838, "recall": 0.8008637236084453, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9302433371958285, "precision": 0.9278779472954231, "recall": 0.9326208178438662, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999115904871364, "precision": 1.0, "recall": 0.9998231966053748, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8820387074174622, "precision": 0.9356247097073851, "recall": 0.8342582622380519, "support": 12073.0 }, "eval_accuracy": 0.8997475172529877, "eval_loss": 0.40174800157546997, "eval_macro avg": { "f1-score": 0.8762710346183864, "precision": 0.868518795767073, "recall": 0.8918915000739345, "support": 29705.0 }, "eval_runtime": 4.7722, "eval_samples_per_second": 16.764, "eval_steps_per_second": 2.095, "eval_weighted avg": { "f1-score": 0.9038782866839282, "precision": 0.9139692560686062, "recall": 0.8997475172529877, "support": 29705.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6602241979126401, "precision": 0.7130531589201224, "recall": 0.6146833013435701, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9052154195011338, "precision": 0.8839681133746679, "recall": 0.9275092936802974, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9072885349800925, "precision": 0.8903596204449405, "recall": 0.9248736850824153, "support": 12073.0 }, "eval_accuracy": 0.910149806429894, "eval_loss": 0.4713464379310608, "eval_macro avg": { "f1-score": 0.8681709883747097, "precision": 0.8718231247141469, "recall": 0.8667665700265708, "support": 29705.0 }, "eval_runtime": 4.7731, "eval_samples_per_second": 16.761, "eval_steps_per_second": 2.095, "eval_weighted avg": { "f1-score": 0.9077607320175198, "precision": 0.9067368029754924, "recall": 0.910149806429894, "support": 29705.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6952499153785399, "precision": 0.6562300319488817, "recall": 0.7392034548944337, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9103908484270734, "precision": 0.9344422700587084, "recall": 0.887546468401487, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9012517385257303, "precision": 0.9174532349407929, "recall": 0.8856125238134681, "support": 12073.0 }, "eval_accuracy": 0.9087695674128935, "eval_loss": 0.4968256950378418, "eval_macro avg": { "f1-score": 0.8767231255828358, "precision": 0.8770313842370958, "recall": 0.8780906117773473, "support": 29705.0 }, "eval_runtime": 4.7941, "eval_samples_per_second": 16.687, "eval_steps_per_second": 2.086, "eval_weighted avg": { "f1-score": 0.9106135328171674, "precision": 0.9134657277821738, "recall": 0.9087695674128935, "support": 29705.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6796779702366431, "precision": 0.6913151364764268, "recall": 0.668426103646833, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9301903898458749, "precision": 0.9079646017699115, "recall": 0.9535315985130112, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.901923474663909, "precision": 0.9008428358948934, "recall": 0.9030067091857865, "support": 12073.0 }, "eval_accuracy": 0.9106884362901868, "eval_loss": 0.5864725708961487, "eval_macro avg": { "f1-score": 0.8779369089628499, "precision": 0.8750085450645222, "recall": 0.8812411028364077, "support": 29705.0 }, "eval_runtime": 4.8031, "eval_samples_per_second": 16.656, "eval_steps_per_second": 2.082, "eval_weighted avg": { "f1-score": 0.9101191594213591, "precision": 0.9096858090555641, "recall": 0.9106884362901868, "support": 29705.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.697112745636184, "precision": 0.6425824731835661, "recall": 0.7617562380038387, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9365296803652968, "precision": 0.920556552962298, "recall": 0.9530669144981413, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8923037300940034, "precision": 0.9260513186029936, "recall": 0.8609293464756067, "support": 12073.0 }, "eval_accuracy": 0.9066487123379903, "eval_loss": 0.5556066632270813, "eval_macro avg": { "f1-score": 0.8814865390238711, "precision": 0.8722975861872144, "recall": 0.8939381247443967, "support": 29705.0 }, "eval_runtime": 4.7999, "eval_samples_per_second": 16.667, "eval_steps_per_second": 2.083, "eval_weighted avg": { "f1-score": 0.9091318205481447, "precision": 0.9140393543072853, "recall": 0.9066487123379903, "support": 29705.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 3.987734794616699, "learning_rate": 1.506172839506173e-05, "loss": 0.0252, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { "f1-score": 0.6895428929242329, "precision": 0.7212470526591565, "recall": 0.6605086372360844, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9265742214139577, "precision": 0.9069870939029817, "recall": 0.9470260223048327, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9083234293676489, "precision": 0.8989292667099286, "recall": 0.917916010933488, "support": 12073.0 }, "eval_accuracy": 0.9151657970038714, "eval_loss": 0.5547633767127991, "eval_macro avg": { "f1-score": 0.8810990862027029, "precision": 0.8817687548472308, "recall": 0.8813626676186013, "support": 29705.0 }, "eval_runtime": 4.8002, "eval_samples_per_second": 16.666, "eval_steps_per_second": 2.083, "eval_weighted avg": { "f1-score": 0.9138424940934561, "precision": 0.9130371003853032, "recall": 0.9151657970038714, "support": 29705.0 }, "step": 1053 }, { "epoch": 14.0, "eval_Claim": { "f1-score": 0.6662641873943491, "precision": 0.6706368497812348, "recall": 0.6619481765834933, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9252051582649472, "precision": 0.9337434926644581, "recall": 0.91682156133829, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8968191757085687, "precision": 0.8933914187078744, "recall": 0.9002733371987078, "support": 12073.0 }, "eval_accuracy": 0.9060090893788925, "eval_loss": 0.598924994468689, "eval_macro avg": { "f1-score": 0.8720721303419663, "precision": 0.8744429402883919, "recall": 0.8697607687801228, "support": 29705.0 }, "eval_runtime": 4.8166, "eval_samples_per_second": 16.609, "eval_steps_per_second": 2.076, "eval_weighted avg": { "f1-score": 0.9058181633386757, "precision": 0.9056571278963901, "recall": 0.9060090893788925, "support": 29705.0 }, "step": 1134 }, { "epoch": 15.0, "eval_Claim": { "f1-score": 0.6859867017940033, "precision": 0.7189061267420458, "recall": 0.6559500959692899, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9230404925408477, "precision": 0.9410912602607436, "recall": 0.9056691449814126, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9089134677944047, "precision": 0.89272306094736, "recall": 0.9257019796239543, "support": 12073.0 }, "eval_accuracy": 0.9146944958761152, "eval_loss": 0.5877691507339478, "eval_macro avg": { "f1-score": 0.8794851655323139, "precision": 0.8881801119875374, "recall": 0.8718303051436642, "support": 29705.0 }, "eval_runtime": 4.7894, "eval_samples_per_second": 16.704, "eval_steps_per_second": 2.088, "eval_weighted avg": { "f1-score": 0.9133441511431463, "precision": 0.9126906124611831, "recall": 0.9146944958761152, "support": 29705.0 }, "step": 1215 }, { "epoch": 16.0, "eval_Claim": { "f1-score": 0.6963696369636965, "precision": 0.6844300278035218, "recall": 0.7087332053742802, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9235896215186861, "precision": 0.9468033186920449, "recall": 0.9014869888475836, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9048587195552051, "precision": 0.9065513801130695, "recall": 0.9031723680940943, "support": 12073.0 }, "eval_accuracy": 0.9126409695337485, "eval_loss": 0.5437762141227722, "eval_macro avg": { "f1-score": 0.881204494509397, "precision": 0.884446181652159, "recall": 0.8783481405789896, "support": 29705.0 }, "eval_runtime": 4.8031, "eval_samples_per_second": 16.656, "eval_steps_per_second": 2.082, "eval_weighted avg": { "f1-score": 0.913192823886985, "precision": 0.9138872213369954, "recall": 0.9126409695337485, "support": 29705.0 }, "step": 1296 }, { "epoch": 17.0, "eval_Claim": { "f1-score": 0.647066981001248, "precision": 0.7666776207689779, "recall": 0.5597408829174664, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9200805910006717, "precision": 0.8876889848812095, "recall": 0.9549256505576208, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9095471382482972, "precision": 0.8760165720423507, "recall": 0.9457467075291974, "support": 12073.0 }, "eval_accuracy": 0.912910284463895, "eval_loss": 0.6229106783866882, "eval_macro avg": { "f1-score": 0.8691626278387974, "precision": 0.8825736959523487, "recall": 0.8651033102510712, "support": 29705.0 }, "eval_runtime": 4.8073, "eval_samples_per_second": 16.641, "eval_steps_per_second": 2.08, "eval_weighted avg": { "f1-score": 0.9079094842894391, "precision": 0.9087011338660376, "recall": 0.912910284463895, "support": 29705.0 }, "step": 1377 }, { "epoch": 18.0, "eval_Claim": { "f1-score": 0.6821297989031078, "precision": 0.6511780104712042, "recall": 0.7161708253358925, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.922899353647276, "precision": 0.9169724770642201, "recall": 0.9289033457249071, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8941394877853255, "precision": 0.9112487100103199, "recall": 0.8776608962146939, "support": 12073.0 }, "eval_accuracy": 0.9053021376872581, "eval_loss": 0.5977877378463745, "eval_macro avg": { "f1-score": 0.8747811103601705, "precision": 0.8698277009156502, "recall": 0.8806837668188734, "support": 29705.0 }, "eval_runtime": 4.7819, "eval_samples_per_second": 16.73, "eval_steps_per_second": 2.091, "eval_weighted avg": { "f1-score": 0.9067713337488226, "precision": 0.9089358856298488, "recall": 0.9053021376872581, "support": 29705.0 }, "step": 1458 }, { "epoch": 18.52, "grad_norm": 3.641792058944702, "learning_rate": 1.2592592592592593e-05, "loss": 0.0101, "step": 1500 }, { "epoch": 19.0, "eval_Claim": { "f1-score": 0.6483875179527353, "precision": 0.711257519335434, "recall": 0.5957293666026872, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9248662479646429, "precision": 0.9259431765253843, "recall": 0.9237918215613383, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9020460770098276, "precision": 0.8779302234417875, "recall": 0.92752422761534, "support": 12073.0 }, "eval_accuracy": 0.9082982662851372, "eval_loss": 0.6089487075805664, "eval_macro avg": { "f1-score": 0.8688249607318015, "precision": 0.8787827298256514, "recall": 0.8617613539448413, "support": 29705.0 }, "eval_runtime": 4.7853, "eval_samples_per_second": 16.718, "eval_steps_per_second": 2.09, "eval_weighted avg": { "f1-score": 0.9054096491562553, "precision": 0.9045077476547858, "recall": 0.9082982662851372, "support": 29705.0 }, "step": 1539 }, { "epoch": 20.0, "eval_Claim": { "f1-score": 0.7207543032562219, "precision": 0.66363819907127, "recall": 0.7886276391554703, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9401589527816735, "precision": 0.9459078080903104, "recall": 0.9344795539033457, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8998161371702227, "precision": 0.9299982322785929, "recall": 0.8715315166073055, "support": 12073.0 }, "eval_accuracy": 0.9133815855916513, "eval_loss": 0.6035876274108887, "eval_macro avg": { "f1-score": 0.8901823483020296, "precision": 0.8848860598600434, "recall": 0.8986596774165303, "support": 29705.0 }, "eval_runtime": 4.7726, "eval_samples_per_second": 16.762, "eval_steps_per_second": 2.095, "eval_weighted avg": { "f1-score": 0.9157652323317351, "precision": 0.9204344815700675, "recall": 0.9133815855916513, "support": 29705.0 }, "step": 1620 }, { "epoch": 21.0, "eval_Claim": { "f1-score": 0.6858804196688157, "precision": 0.7248196633716271, "recall": 0.650911708253359, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9371935559187485, "precision": 0.9418113561708118, "recall": 0.9326208178438662, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.907693558880937, "precision": 0.8915248821790878, "recall": 0.9244595378116458, "support": 12073.0 }, "eval_accuracy": 0.9154351119340178, "eval_loss": 0.6373494267463684, "eval_macro avg": { "f1-score": 0.8826918836171254, "precision": 0.8895389754303817, "recall": 0.8769980159772177, "support": 29705.0 }, "eval_runtime": 4.7918, "eval_samples_per_second": 16.695, "eval_steps_per_second": 2.087, "eval_weighted avg": { "f1-score": 0.913858759733591, "precision": 0.9130855511853444, "recall": 0.9154351119340178, "support": 29705.0 }, "step": 1701 }, { "epoch": 22.0, "eval_Claim": { "f1-score": 0.6833652924256951, "precision": 0.6827107279693486, "recall": 0.684021113243762, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9286995515695068, "precision": 0.8973136915077989, "recall": 0.962360594795539, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9007964638672282, "precision": 0.9070372858582466, "recall": 0.8946409343162428, "support": 12073.0 }, "eval_accuracy": 0.9101161420636257, "eval_loss": 0.6425191760063171, "eval_macro avg": { "f1-score": 0.8782042772418507, "precision": 0.8717433278630627, "recall": 0.8852556605888859, "support": 29705.0 }, "eval_runtime": 4.8051, "eval_samples_per_second": 16.649, "eval_steps_per_second": 2.081, "eval_weighted avg": { "f1-score": 0.9100704832242509, "precision": 0.910224494827858, "recall": 0.9101161420636257, "support": 29705.0 }, "step": 1782 }, { "epoch": 23.0, "eval_Claim": { "f1-score": 0.7114282372207277, "precision": 0.6941337594156586, "recall": 0.7296065259117083, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9322964318389753, "precision": 0.918018018018018, "recall": 0.9470260223048327, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9069348418185628, "precision": 0.9177408412483039, "recall": 0.8963803528534747, "support": 12073.0 }, "eval_accuracy": 0.9161083992593839, "eval_loss": 0.6442272663116455, "eval_macro avg": { "f1-score": 0.8876648777195664, "precision": 0.8824731546704951, "recall": 0.893253225267504, "support": 29705.0 }, "eval_runtime": 4.7909, "eval_samples_per_second": 16.698, "eval_steps_per_second": 2.087, "eval_weighted avg": { "f1-score": 0.9167803117094421, "precision": 0.917711141572463, "recall": 0.9161083992593839, "support": 29705.0 }, "step": 1863 }, { "epoch": 24.0, "eval_Claim": { "f1-score": 0.70479277610558, "precision": 0.6809843400447427, "recall": 0.7303262955854126, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9265043638033992, "precision": 0.915985467756585, "recall": 0.9372676579925651, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.904177523745482, "precision": 0.917754457810767, "recall": 0.8909964383334714, "support": 12073.0 }, "eval_accuracy": 0.9133142568591146, "eval_loss": 0.6410267353057861, "eval_macro avg": { "f1-score": 0.8838686659136152, "precision": 0.8786810664030237, "recall": 0.8896475979778622, "support": 29705.0 }, "eval_runtime": 4.8171, "eval_samples_per_second": 16.607, "eval_steps_per_second": 2.076, "eval_weighted avg": { "f1-score": 0.9143090027231838, "precision": 0.9157243906772613, "recall": 0.9133142568591146, "support": 29705.0 }, "step": 1944 }, { "epoch": 24.69, "grad_norm": 0.002705144230276346, "learning_rate": 1.0123456790123458e-05, "loss": 0.0041, "step": 2000 }, { "epoch": 25.0, "eval_Claim": { "f1-score": 0.6708741775211494, "precision": 0.761817627325404, "recall": 0.5993282149712092, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9283402681460935, "precision": 0.9236430542778289, "recall": 0.9330855018587361, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9124055491144605, "precision": 0.881839258114374, "recall": 0.94516690135012, "support": 12073.0 }, "eval_accuracy": 0.9166470291196768, "eval_loss": 0.6537314653396606, "eval_macro avg": { "f1-score": 0.8779049986954259, "precision": 0.8918249849294018, "recall": 0.8693951545450163, "support": 29705.0 }, "eval_runtime": 4.8009, "eval_samples_per_second": 16.664, "eval_steps_per_second": 2.083, "eval_weighted avg": { "f1-score": 0.9130268986169812, "precision": 0.9130241066053866, "recall": 0.9166470291196768, "support": 29705.0 }, "step": 2025 }, { "epoch": 26.0, "eval_Claim": { "f1-score": 0.7055278711322285, "precision": 0.682358215646716, "recall": 0.7303262955854126, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9258656751103881, "precision": 0.9260808926080892, "recall": 0.9256505576208178, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9045403093950447, "precision": 0.9157894736842105, "recall": 0.8935641514122422, "support": 12073.0 }, "eval_accuracy": 0.9135162430567244, "eval_loss": 0.6640132665634155, "eval_macro avg": { "f1-score": 0.8839724141856584, "precision": 0.8810350470139681, "recall": 0.8873852511546182, "support": 29705.0 }, "eval_runtime": 4.8275, "eval_samples_per_second": 16.572, "eval_steps_per_second": 2.071, "eval_weighted avg": { "f1-score": 0.9144964914035518, "precision": 0.9158162439956733, "recall": 0.9135162430567244, "support": 29705.0 }, "step": 2106 }, { "epoch": 27.0, "eval_Claim": { "f1-score": 0.6693952997095327, "precision": 0.7442748091603053, "recall": 0.6082053742802304, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9346269346269347, "precision": 0.9292604501607717, "recall": 0.9400557620817844, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9072057227826226, "precision": 0.8811085089773615, "recall": 0.9348960490350369, "support": 12073.0 }, "eval_accuracy": 0.9142231947483589, "eval_loss": 0.6806138753890991, "eval_macro avg": { "f1-score": 0.8778069892797724, "precision": 0.8886609420746096, "recall": 0.8707892963492629, "support": 29705.0 }, "eval_runtime": 4.796, "eval_samples_per_second": 16.68, "eval_steps_per_second": 2.085, "eval_weighted avg": { "f1-score": 0.9111614699094798, "precision": 0.9106725777549173, "recall": 0.9142231947483589, "support": 29705.0 }, "step": 2187 }, { "epoch": 28.0, "eval_Claim": { "f1-score": 0.7062782871068818, "precision": 0.6839739267251068, "recall": 0.7300863723608445, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9229723606705936, "precision": 0.9005305039787799, "recall": 0.9465613382899628, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9056619659019154, "precision": 0.9208183530217429, "recall": 0.8909964383334714, "support": 12073.0 }, "eval_accuracy": 0.9139538798182124, "eval_loss": 0.6814476251602173, "eval_macro avg": { "f1-score": 0.8837281534198478, "precision": 0.8763306959314074, "recall": 0.8919110372460697, "support": 29705.0 }, "eval_runtime": 4.7745, "eval_samples_per_second": 16.756, "eval_steps_per_second": 2.094, "eval_weighted avg": { "f1-score": 0.914864882516695, "precision": 0.9162694814739634, "recall": 0.9139538798182124, "support": 29705.0 }, "step": 2268 }, { "epoch": 29.0, "eval_Claim": { "f1-score": 0.6726794438181135, "precision": 0.7038007863695938, "recall": 0.644193857965451, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9280182232346242, "precision": 0.9101876675603218, "recall": 0.9465613382899628, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9038177945272816, "precision": 0.8940756949509685, "recall": 0.9137745382257931, "support": 12073.0 }, "eval_accuracy": 0.9111597374179431, "eval_loss": 0.7108751535415649, "eval_macro avg": { "f1-score": 0.8761178156712479, "precision": 0.8769939387494352, "recall": 0.8761324336203018, "support": 29705.0 }, "eval_runtime": 4.7653, "eval_samples_per_second": 16.788, "eval_steps_per_second": 2.098, "eval_weighted avg": { "f1-score": 0.9097497186891812, "precision": 0.9088483922476083, "recall": 0.9111597374179431, "support": 29705.0 }, "step": 2349 }, { "epoch": 30.0, "eval_Claim": { "f1-score": 0.6894300778307808, "precision": 0.7230121116377041, "recall": 0.6588291746641075, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.934747521328107, "precision": 0.9276887871853547, "recall": 0.9419144981412639, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9087121676265164, "precision": 0.8963738920225625, "recall": 0.9213948480079516, "support": 12073.0 }, "eval_accuracy": 0.9159737417943107, "eval_loss": 0.6745549440383911, "eval_macro avg": { "f1-score": 0.883222441696351, "precision": 0.8867686977114053, "recall": 0.8805346302033308, "support": 29705.0 }, "eval_runtime": 4.7909, "eval_samples_per_second": 16.698, "eval_steps_per_second": 2.087, "eval_weighted avg": { "f1-score": 0.9145936115149541, "precision": 0.9137795909684305, "recall": 0.9159737417943107, "support": 29705.0 }, "step": 2430 }, { "epoch": 30.86, "grad_norm": 0.013120048679411411, "learning_rate": 7.654320987654322e-06, "loss": 0.0026, "step": 2500 }, { "epoch": 31.0, "eval_Claim": { "f1-score": 0.6823113802674742, "precision": 0.71953166577967, "recall": 0.6487523992322457, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9337597076290544, "precision": 0.9182389937106918, "recall": 0.949814126394052, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9071154317457725, "precision": 0.8948343943911677, "recall": 0.9197382589248737, "support": 12073.0 }, "eval_accuracy": 0.914458845312237, "eval_loss": 0.6794646382331848, "eval_macro avg": { "f1-score": 0.8807966299105753, "precision": 0.8831512634703823, "recall": 0.8795761961377929, "support": 29705.0 }, "eval_runtime": 4.7931, "eval_samples_per_second": 16.691, "eval_steps_per_second": 2.086, "eval_weighted avg": { "f1-score": 0.9128742410785817, "precision": 0.9119809439797892, "recall": 0.914458845312237, "support": 29705.0 }, "step": 2511 }, { "epoch": 32.0, "eval_Claim": { "f1-score": 0.6683053040103493, "precision": 0.7251544076361595, "recall": 0.619721689059501, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9353594919482876, "precision": 0.9136021267168808, "recall": 0.95817843866171, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.904937720615085, "precision": 0.886909495784953, "recall": 0.9237140727242608, "support": 12073.0 }, "eval_accuracy": 0.9126073051674802, "eval_loss": 0.7192577123641968, "eval_macro avg": { "f1-score": 0.8771506291434306, "precision": 0.8814165075344984, "recall": 0.875403550111368, "support": 29705.0 }, "eval_runtime": 4.7949, "eval_samples_per_second": 16.684, "eval_steps_per_second": 2.086, "eval_weighted avg": { "f1-score": 0.9101398160166225, "precision": 0.9092130513494017, "recall": 0.9126073051674802, "support": 29705.0 }, "step": 2592 }, { "epoch": 33.0, "eval_Claim": { "f1-score": 0.7084387215451827, "precision": 0.6801324503311258, "recall": 0.7392034548944337, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.937269372693727, "precision": 0.9304029304029304, "recall": 0.9442379182156134, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9026608285618052, "precision": 0.9178868053771727, "recall": 0.8879317485297772, "support": 12073.0 }, "eval_accuracy": 0.9138192223531392, "eval_loss": 0.699444055557251, "eval_macro avg": { "f1-score": 0.8870922307001787, "precision": 0.8821055465278073, "recall": 0.8928432804099561, "support": 29705.0 }, "eval_runtime": 4.7863, "eval_samples_per_second": 16.715, "eval_steps_per_second": 2.089, "eval_weighted avg": { "f1-score": 0.9149840250686382, "precision": 0.9167031328236271, "recall": 0.9138192223531392, "support": 29705.0 }, "step": 2673 }, { "epoch": 34.0, "eval_Claim": { "f1-score": 0.6974015088013411, "precision": 0.6961510877360746, "recall": 0.6986564299424184, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9402335699564919, "precision": 0.92686230248307, "recall": 0.953996282527881, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9036895462855244, "precision": 0.9066277615673197, "recall": 0.9007703139236313, "support": 12073.0 }, "eval_accuracy": 0.9140548729170174, "eval_loss": 0.7247604131698608, "eval_macro avg": { "f1-score": 0.8853311562608394, "precision": 0.882410287946616, "recall": 0.8883557565984826, "support": 29705.0 }, "eval_runtime": 4.8212, "eval_samples_per_second": 16.593, "eval_steps_per_second": 2.074, "eval_weighted avg": { "f1-score": 0.9140682047983671, "precision": 0.9141182418458097, "recall": 0.9140548729170174, "support": 29705.0 }, "step": 2754 }, { "epoch": 35.0, "eval_Claim": { "f1-score": 0.6959401447320005, "precision": 0.7119196988707653, "recall": 0.680662188099808, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.9385984934946359, "precision": 0.9223867205024675, "recall": 0.9553903345724907, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.9068492020947589, "precision": 0.9029397273772376, "recall": 0.9107926778762528, "support": 12073.0 }, "eval_accuracy": 0.9157044268641643, "eval_loss": 0.7355715036392212, "eval_macro avg": { "f1-score": 0.885335910356592, "precision": 0.8842894382168318, "recall": 0.8867113001371378, "support": 29705.0 }, "eval_runtime": 4.8984, "eval_samples_per_second": 16.332, "eval_steps_per_second": 2.041, "eval_weighted avg": { "f1-score": 0.9150120491578154, "precision": 0.914473958742095, "recall": 0.9157044268641643, "support": 29705.0 }, "step": 2835 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 5032605645438000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }