{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.3650793650793651, "precision": 0.45800144822592326, "recall": 0.30350287907869483, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.652992277992278, "precision": 0.6792168674698795, "recall": 0.6287174721189591, "support": 2152.0 }, "eval_O": { "f1-score": 0.9929261022378432, "precision": 0.9994626063591581, "recall": 0.986474540311174, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.873351637727677, "precision": 0.8190918322936313, "recall": 0.9353101963058064, "support": 12073.0 }, "eval_accuracy": 0.8439319979801381, "eval_loss": 0.3479689359664917, "eval_macro avg": { "f1-score": 0.7210873457592908, "precision": 0.7389431885871479, "recall": 0.7135012719536586, "support": 29705.0 }, "eval_runtime": 1.3927, "eval_samples_per_second": 57.443, "eval_steps_per_second": 7.18, "eval_weighted avg": { "f1-score": 0.8316056073620907, "precision": 0.8269800178224755, "recall": 0.8439319979801381, "support": 29705.0 }, "step": 41 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.427892234548336, "precision": 0.6302521008403361, "recall": 0.32389635316698656, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.7983248842847697, "precision": 0.7593291404612159, "recall": 0.841542750929368, "support": 2152.0 }, "eval_O": { "f1-score": 0.9990268931351733, "precision": 0.9997344192634561, "recall": 0.9983203677510608, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8914659988441533, "precision": 0.833381357153148, "recall": 0.9582539551064359, "support": 12073.0 }, "eval_accuracy": 0.876047803400101, "eval_loss": 0.2758464217185974, "eval_macro avg": { "f1-score": 0.779177502703108, "precision": 0.805674254429539, "recall": 0.7805033567384627, "support": 29705.0 }, "eval_runtime": 1.3892, "eval_samples_per_second": 57.586, "eval_steps_per_second": 7.198, "eval_weighted avg": { "f1-score": 0.8606332672536217, "precision": 0.8628640276786139, "recall": 0.876047803400101, "support": 29705.0 }, "step": 82 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.5882352941176471, "precision": 0.620671283963772, "recall": 0.559021113243762, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.822195416164053, "precision": 0.8549924736578023, "recall": 0.79182156133829, "support": 2152.0 }, "eval_O": { "f1-score": 0.999557835160948, "precision": 0.9999115357395613, "recall": 0.9992043847241867, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.897156953936992, "precision": 0.8765607712976135, "recall": 0.918744305475027, "support": 12073.0 }, "eval_accuracy": 0.8897155361050328, "eval_loss": 0.2410457581281662, "eval_macro avg": { "f1-score": 0.8267863748449099, "precision": 0.8380340161646872, "recall": 0.8171978411953165, "support": 29705.0 }, "eval_runtime": 1.3926, "eval_samples_per_second": 57.447, "eval_steps_per_second": 7.181, "eval_weighted avg": { "f1-score": 0.8873759763571568, "precision": 0.8860669651248813, "recall": 0.8897155361050328, "support": 29705.0 }, "step": 123 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6018413419094463, "precision": 0.6344057431534167, "recall": 0.5724568138195777, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8690502303136654, "precision": 0.8230162027420025, "recall": 0.9205390334572491, "support": 2152.0 }, "eval_O": { "f1-score": 0.9995578742594394, "precision": 0.9998231027772864, "recall": 0.9992927864214993, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8916227781435153, "precision": 0.8858637887335459, "recall": 0.8974571357574753, "support": 12073.0 }, "eval_accuracy": 0.8923076923076924, "eval_loss": 0.24871428310871124, "eval_macro avg": { "f1-score": 0.8405180561565165, "precision": 0.8357772093515627, "recall": 0.8474364423639503, "support": 29705.0 }, "eval_runtime": 1.3944, "eval_samples_per_second": 57.373, "eval_steps_per_second": 7.172, "eval_weighted avg": { "f1-score": 0.8904302737876794, "precision": 0.8894248936462209, "recall": 0.8923076923076924, "support": 29705.0 }, "step": 164 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6216759248315801, "precision": 0.6126252038201724, "recall": 0.6309980806142035, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8738404452690167, "precision": 0.8722222222222222, "recall": 0.8754646840148699, "support": 2152.0 }, "eval_O": { "f1-score": 0.9996462681287585, "precision": 1.0, "recall": 0.9992927864214993, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8883893260064111, "precision": 0.8930364914630063, "recall": 0.8837902758220824, "support": 12073.0 }, "eval_accuracy": 0.8917017337148628, "eval_loss": 0.2594112753868103, "eval_macro avg": { "f1-score": 0.8458879910589416, "precision": 0.8444709793763502, "recall": 0.8473864567181638, "support": 29705.0 }, "eval_runtime": 1.3912, "eval_samples_per_second": 57.505, "eval_steps_per_second": 7.188, "eval_weighted avg": { "f1-score": 0.8922798455096741, "precision": 0.8929161297147813, "recall": 0.8917017337148628, "support": 29705.0 }, "step": 205 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.6567796610169492, "precision": 0.5880121396054628, "recall": 0.7437619961612284, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8874388254486133, "precision": 0.8901355773726041, "recall": 0.8847583643122676, "support": 2152.0 }, "eval_O": { "f1-score": 0.9997789469030461, "precision": 1.0, "recall": 0.999557991513437, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8790980052038161, "precision": 0.9225448257031037, "recall": 0.8395593473039012, "support": 12073.0 }, "eval_accuracy": 0.8903214946978623, "eval_loss": 0.2811674177646637, "eval_macro avg": { "f1-score": 0.8557738596431061, "precision": 0.8501731356702926, "recall": 0.8669094248227085, "support": 29705.0 }, "eval_runtime": 1.3897, "eval_samples_per_second": 57.567, "eval_steps_per_second": 7.196, "eval_weighted avg": { "f1-score": 0.8944647582453118, "precision": 0.9027534099005212, "recall": 0.8903214946978623, "support": 29705.0 }, "step": 246 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.6388127853881279, "precision": 0.6093205574912892, "recall": 0.6713051823416507, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8804010503700167, "precision": 0.905252822778596, "recall": 0.8568773234200744, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999115904871364, "precision": 1.0, "recall": 0.9998231966053748, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.886362682998448, "precision": 0.8979262281149074, "recall": 0.8750931831359231, "support": 12073.0 }, "eval_accuracy": 0.8926780003366437, "eval_loss": 0.30266591906547546, "eval_macro avg": { "f1-score": 0.8513720273109323, "precision": 0.8531249020961982, "recall": 0.8507747213757557, "support": 29705.0 }, "eval_runtime": 1.3903, "eval_samples_per_second": 57.544, "eval_steps_per_second": 7.193, "eval_weighted avg": { "f1-score": 0.894437008359695, "precision": 0.8968327052777144, "recall": 0.8926780003366437, "support": 29705.0 }, "step": 287 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.64190800681431, "precision": 0.6094457623463446, "recall": 0.6780230326295585, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8679334916864608, "precision": 0.8877551020408163, "recall": 0.8489776951672863, "support": 2152.0 }, "eval_O": { "f1-score": 0.9996904982977407, "precision": 1.0, "recall": 0.9993811881188119, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8887206661619985, "precision": 0.9026911576249466, "recall": 0.875176012590077, "support": 12073.0 }, "eval_accuracy": 0.8929136509005218, "eval_loss": 0.3308269679546356, "eval_macro avg": { "f1-score": 0.8495631657401276, "precision": 0.8499730055030268, "recall": 0.8503894821264335, "support": 29705.0 }, "eval_runtime": 1.3922, "eval_samples_per_second": 57.462, "eval_steps_per_second": 7.183, "eval_weighted avg": { "f1-score": 0.8948422476293271, "precision": 0.8975192480409824, "recall": 0.8929136509005218, "support": 29705.0 }, "step": 328 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.63868836412578, "precision": 0.651685393258427, "recall": 0.6261996161228407, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8941736028537456, "precision": 0.9157330735509012, "recall": 0.8736059479553904, "support": 2152.0 }, "eval_O": { "f1-score": 0.9996904982977407, "precision": 1.0, "recall": 0.9993811881188119, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8962523039115298, "precision": 0.8864851725814292, "recall": 0.9062370578977884, "support": 12073.0 }, "eval_accuracy": 0.9000504965494025, "eval_loss": 0.34075838327407837, "eval_macro avg": { "f1-score": 0.857201192297199, "precision": 0.8634759098476893, "recall": 0.8513559525237079, "support": 29705.0 }, "eval_runtime": 1.3948, "eval_samples_per_second": 57.357, "eval_steps_per_second": 7.17, "eval_weighted avg": { "f1-score": 0.8993525560304815, "precision": 0.8988863080948749, "recall": 0.9000504965494025, "support": 29705.0 }, "step": 369 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.6206814955040227, "precision": 0.6122782446311859, "recall": 0.6293186180422264, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8776916451335055, "precision": 0.8178170144462279, "recall": 0.9470260223048327, "support": 2152.0 }, "eval_O": { "f1-score": 0.9999558011049724, "precision": 0.9999116061168567, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8864029718434716, "precision": 0.9038395316804407, "recall": 0.869626439161766, "support": 12073.0 }, "eval_accuracy": 0.89116310385457, "eval_loss": 0.4050362706184387, "eval_macro avg": { "f1-score": 0.8461829783964931, "precision": 0.8334615992186778, "recall": 0.8614927698772062, "support": 29705.0 }, "eval_runtime": 1.3901, "eval_samples_per_second": 57.551, "eval_steps_per_second": 7.194, "eval_weighted avg": { "f1-score": 0.8917298769484514, "precision": 0.8932830396594146, "recall": 0.89116310385457, "support": 29705.0 }, "step": 410 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6361489146327746, "precision": 0.5987719669701461, "recall": 0.6785028790786948, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8838862559241706, "precision": 0.9018375241779497, "recall": 0.866635687732342, "support": 2152.0 }, "eval_O": { "f1-score": 1.0, "precision": 1.0, "recall": 1.0, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8829567053854277, "precision": 0.90087915876573, "recall": 0.8657334548165327, "support": 12073.0 }, "eval_accuracy": 0.8906581383605454, "eval_loss": 0.4124489724636078, "eval_macro avg": { "f1-score": 0.8507479689855932, "precision": 0.8503721624784565, "recall": 0.8527180054068924, "support": 29705.0 }, "eval_runtime": 1.3986, "eval_samples_per_second": 57.198, "eval_steps_per_second": 7.15, "eval_weighted avg": { "f1-score": 0.8929650968879477, "precision": 0.8963053356048198, "recall": 0.8906581383605454, "support": 29705.0 }, "step": 451 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.6082852648138437, "precision": 0.6705202312138728, "recall": 0.5566218809980806, "support": 4168.0 }, "eval_MajorClaim": { "f1-score": 0.8887310606060606, "precision": 0.9058880308880309, "recall": 0.8722118959107806, "support": 2152.0 }, "eval_O": { "f1-score": 0.9996462994075515, "precision": 0.9999115513886432, "recall": 0.9993811881188119, "support": 11312.0 }, "eval_Premise": { "f1-score": 0.8976744186046511, "precision": 0.8699774617237895, "recall": 0.9271929097987244, "support": 12073.0 }, "eval_accuracy": 0.8987039218986702, "eval_loss": 0.4420628547668457, "eval_macro avg": { "f1-score": 0.8485842608580267, "precision": 0.861574318803584, "recall": 0.8388519687065994, "support": 29705.0 }, "eval_runtime": 1.3926, "eval_samples_per_second": 57.446, "eval_steps_per_second": 7.181, "eval_weighted avg": { "f1-score": 0.8952534731823099, "precision": 0.8940729416216161, "recall": 0.8987039218986702, "support": 29705.0 }, "step": 492 } ], "logging_steps": 500, "max_steps": 656, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 1725464792721600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }