{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 648, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.5385059461955894, "precision": 0.6267132491265789, "recall": 0.4720647773279352, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.7814784727863526, "precision": 0.9588039867109635, "recall": 0.6595063985374772, "support": 2188.0 }, "eval_O": { "f1-score": 0.9938608715302625, "precision": 0.9996969237763298, "recall": 0.9880925634688834, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.8968837690149166, "precision": 0.8455112497215416, "recall": 0.954902824076986, "support": 15899.0 }, "eval_accuracy": 0.8837548103353491, "eval_loss": 0.2860471308231354, "eval_macro avg": { "f1-score": 0.8026822648817803, "precision": 0.8576813523338533, "recall": 0.7686416408528204, "support": 36380.0 }, "eval_runtime": 4.9572, "eval_samples_per_second": 16.34, "eval_steps_per_second": 2.219, "eval_weighted avg": { "f1-score": 0.8768739289107891, "precision": 0.8792073379911217, "recall": 0.8837548103353491, "support": 36380.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.6469459868753155, "precision": 0.6453172205438067, "recall": 0.648582995951417, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8435970110796187, "precision": 0.9669226225634967, "recall": 0.7481718464351006, "support": 2188.0 }, "eval_O": { "f1-score": 0.998877245508982, "precision": 0.9983541557567143, "recall": 0.9994008836965476, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9099026477336144, "precision": 0.8972179761540813, "recall": 0.922951129001824, "support": 15899.0 }, "eval_accuracy": 0.903243540406817, "eval_loss": 0.24885737895965576, "eval_macro avg": { "f1-score": 0.8498307227993828, "precision": 0.8769529937545246, "recall": 0.8297767137712222, "support": 36380.0 }, "eval_runtime": 5.0097, "eval_samples_per_second": 16.169, "eval_steps_per_second": 2.196, "eval_weighted avg": { "f1-score": 0.9028656814459712, "precision": 0.9043261520711511, "recall": 0.903243540406817, "support": 36380.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.7040630102775911, "precision": 0.6698958142935478, "recall": 0.7419028340080972, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8888411094388303, "precision": 0.8392204628501827, "recall": 0.9446983546617916, "support": 2188.0 }, "eval_O": { "f1-score": 0.998989104796136, "precision": 0.9988769092542678, "recall": 0.9991013255448213, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.914582593823615, "precision": 0.9390987408880053, "recall": 0.8913139191144097, "support": 15899.0 }, "eval_accuracy": 0.9137987905442551, "eval_loss": 0.24298855662345886, "eval_macro avg": { "f1-score": 0.8766189545840432, "precision": 0.8617729818215009, "recall": 0.8942541083322799, "support": 36380.0 }, "eval_runtime": 4.9876, "eval_samples_per_second": 16.24, "eval_steps_per_second": 2.205, "eval_weighted avg": { "f1-score": 0.9154289662937856, "precision": 0.9184781183611038, "recall": 0.9137987905442551, "support": 36380.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6981611893583725, "precision": 0.6754352763058289, "recall": 0.7224696356275304, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.901231802911534, "precision": 0.8836187966622749, "recall": 0.9195612431444241, "support": 2188.0 }, "eval_O": { "f1-score": 0.9989130842172332, "precision": 0.9998499399759904, "recall": 0.9979779824758481, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9146224912392481, "precision": 0.9266670970240785, "recall": 0.9028869740235235, "support": 15899.0 }, "eval_accuracy": 0.9142935678944475, "eval_loss": 0.23980534076690674, "eval_macro avg": { "f1-score": 0.8782321419315969, "precision": 0.8713927774920431, "recall": 0.8857239588178316, "support": 36380.0 }, "eval_runtime": 4.9726, "eval_samples_per_second": 16.289, "eval_steps_per_second": 2.212, "eval_weighted avg": { "f1-score": 0.9153623106642749, "precision": 0.9168247558035478, "recall": 0.9142935678944475, "support": 36380.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6650318783717509, "precision": 0.6450999048525214, "recall": 0.6862348178137652, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9172031076581575, "precision": 0.8916702632714717, "recall": 0.9442413162705667, "support": 2188.0 }, "eval_O": { "f1-score": 0.9982383147794145, "precision": 0.9992495872730002, "recall": 0.9972290870965326, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.8998438545616774, "precision": 0.9119622787753521, "recall": 0.8880432731618341, "support": 15899.0 }, "eval_accuracy": 0.9040956569543706, "eval_loss": 0.2932502031326294, "eval_macro avg": { "f1-score": 0.87007928884275, "precision": 0.8619955085430864, "recall": 0.8789371235856747, "support": 36380.0 }, "eval_runtime": 4.9595, "eval_samples_per_second": 16.332, "eval_steps_per_second": 2.218, "eval_weighted avg": { "f1-score": 0.9051179642561333, "precision": 0.9065430476942037, "recall": 0.9040956569543706, "support": 36380.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.6622698072805139, "precision": 0.5740163325909429, "recall": 0.7825910931174089, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9223704351629127, "precision": 0.8842767295597485, "recall": 0.9638939670932358, "support": 2188.0 }, "eval_O": { "f1-score": 0.9982004948639124, "precision": 0.9993994444861497, "recall": 0.9970044184827379, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.8780749379985253, "precision": 0.9398091685199799, "recall": 0.8239511918988616, "support": 15899.0 }, "eval_accuracy": 0.8902693787795491, "eval_loss": 0.3604726195335388, "eval_macro avg": { "f1-score": 0.865228918826466, "precision": 0.8493754187892053, "recall": 0.8918601676480611, "support": 36380.0 }, "eval_runtime": 4.991, "eval_samples_per_second": 16.229, "eval_steps_per_second": 2.204, "eval_weighted avg": { "f1-score": 0.8955262233990811, "precision": 0.9086708334194257, "recall": 0.8902693787795491, "support": 36380.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 17.04454231262207, "learning_rate": 1.7530864197530865e-05, "loss": 0.2104, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.7037529399734124, "precision": 0.7110973341599504, "recall": 0.6965587044534413, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9196885428253615, "precision": 0.8959687906371911, "recall": 0.9446983546617916, "support": 2188.0 }, "eval_O": { "f1-score": 0.9987257327036954, "precision": 0.9996248780853777, "recall": 0.997828203399985, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9188781285372909, "precision": 0.9187048098082364, "recall": 0.9190515126737531, "support": 15899.0 }, "eval_accuracy": 0.9192963166575041, "eval_loss": 0.36414769291877747, "eval_macro avg": { "f1-score": 0.8852613360099401, "precision": 0.8813489531726889, "recall": 0.8895341937972427, "support": 36380.0 }, "eval_runtime": 4.9835, "eval_samples_per_second": 16.254, "eval_steps_per_second": 2.207, "eval_weighted avg": { "f1-score": 0.9190227083446231, "precision": 0.9188476996393494, "recall": 0.9192963166575041, "support": 36380.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.7025399811853246, "precision": 0.6562390158172232, "recall": 0.7558704453441295, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9105835367206567, "precision": 0.8848641655886158, "recall": 0.9378427787934186, "support": 2188.0 }, "eval_O": { "f1-score": 0.9986513823331086, "precision": 0.9991005172026085, "recall": 0.9982026510896428, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9083384525849526, "precision": 0.9345974717232203, "recall": 0.8835146864582678, "support": 15899.0 }, "eval_accuracy": 0.911544804837823, "eval_loss": 0.35222452878952026, "eval_macro avg": { "f1-score": 0.8800283382060107, "precision": 0.8687002925829169, "recall": 0.8938576404213647, "support": 36380.0 }, "eval_runtime": 4.9741, "eval_samples_per_second": 16.285, "eval_steps_per_second": 2.211, "eval_weighted avg": { "f1-score": 0.9136769997620249, "precision": 0.9174837257168467, "recall": 0.911544804837823, "support": 36380.0 }, "step": 648 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1146737470939200.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }