{ "best_metric": null, "best_model_checkpoint": null, "epoch": 19.0, "eval_steps": 500, "global_step": 1539, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Claim": { "f1-score": 0.5385059461955894, "precision": 0.6267132491265789, "recall": 0.4720647773279352, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.7814784727863526, "precision": 0.9588039867109635, "recall": 0.6595063985374772, "support": 2188.0 }, "eval_O": { "f1-score": 0.9938608715302625, "precision": 0.9996969237763298, "recall": 0.9880925634688834, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.8968837690149166, "precision": 0.8455112497215416, "recall": 0.954902824076986, "support": 15899.0 }, "eval_accuracy": 0.8837548103353491, "eval_loss": 0.2860471308231354, "eval_macro avg": { "f1-score": 0.8026822648817803, "precision": 0.8576813523338533, "recall": 0.7686416408528204, "support": 36380.0 }, "eval_runtime": 4.9572, "eval_samples_per_second": 16.34, "eval_steps_per_second": 2.219, "eval_weighted avg": { "f1-score": 0.8768739289107891, "precision": 0.8792073379911217, "recall": 0.8837548103353491, "support": 36380.0 }, "step": 81 }, { "epoch": 2.0, "eval_Claim": { "f1-score": 0.6469459868753155, "precision": 0.6453172205438067, "recall": 0.648582995951417, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8435970110796187, "precision": 0.9669226225634967, "recall": 0.7481718464351006, "support": 2188.0 }, "eval_O": { "f1-score": 0.998877245508982, "precision": 0.9983541557567143, "recall": 0.9994008836965476, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9099026477336144, "precision": 0.8972179761540813, "recall": 0.922951129001824, "support": 15899.0 }, "eval_accuracy": 0.903243540406817, "eval_loss": 0.24885737895965576, "eval_macro avg": { "f1-score": 0.8498307227993828, "precision": 0.8769529937545246, "recall": 0.8297767137712222, "support": 36380.0 }, "eval_runtime": 5.0097, "eval_samples_per_second": 16.169, "eval_steps_per_second": 2.196, "eval_weighted avg": { "f1-score": 0.9028656814459712, "precision": 0.9043261520711511, "recall": 0.903243540406817, "support": 36380.0 }, "step": 162 }, { "epoch": 3.0, "eval_Claim": { "f1-score": 0.7040630102775911, "precision": 0.6698958142935478, "recall": 0.7419028340080972, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8888411094388303, "precision": 0.8392204628501827, "recall": 0.9446983546617916, "support": 2188.0 }, "eval_O": { "f1-score": 0.998989104796136, "precision": 0.9988769092542678, "recall": 0.9991013255448213, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.914582593823615, "precision": 0.9390987408880053, "recall": 0.8913139191144097, "support": 15899.0 }, "eval_accuracy": 0.9137987905442551, "eval_loss": 0.24298855662345886, "eval_macro avg": { "f1-score": 0.8766189545840432, "precision": 0.8617729818215009, "recall": 0.8942541083322799, "support": 36380.0 }, "eval_runtime": 4.9876, "eval_samples_per_second": 16.24, "eval_steps_per_second": 2.205, "eval_weighted avg": { "f1-score": 0.9154289662937856, "precision": 0.9184781183611038, "recall": 0.9137987905442551, "support": 36380.0 }, "step": 243 }, { "epoch": 4.0, "eval_Claim": { "f1-score": 0.6981611893583725, "precision": 0.6754352763058289, "recall": 0.7224696356275304, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.901231802911534, "precision": 0.8836187966622749, "recall": 0.9195612431444241, "support": 2188.0 }, "eval_O": { "f1-score": 0.9989130842172332, "precision": 0.9998499399759904, "recall": 0.9979779824758481, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9146224912392481, "precision": 0.9266670970240785, "recall": 0.9028869740235235, "support": 15899.0 }, "eval_accuracy": 0.9142935678944475, "eval_loss": 0.23980534076690674, "eval_macro avg": { "f1-score": 0.8782321419315969, "precision": 0.8713927774920431, "recall": 0.8857239588178316, "support": 36380.0 }, "eval_runtime": 4.9726, "eval_samples_per_second": 16.289, "eval_steps_per_second": 2.212, "eval_weighted avg": { "f1-score": 0.9153623106642749, "precision": 0.9168247558035478, "recall": 0.9142935678944475, "support": 36380.0 }, "step": 324 }, { "epoch": 5.0, "eval_Claim": { "f1-score": 0.6650318783717509, "precision": 0.6450999048525214, "recall": 0.6862348178137652, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9172031076581575, "precision": 0.8916702632714717, "recall": 0.9442413162705667, "support": 2188.0 }, "eval_O": { "f1-score": 0.9982383147794145, "precision": 0.9992495872730002, "recall": 0.9972290870965326, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.8998438545616774, "precision": 0.9119622787753521, "recall": 0.8880432731618341, "support": 15899.0 }, "eval_accuracy": 0.9040956569543706, "eval_loss": 0.2932502031326294, "eval_macro avg": { "f1-score": 0.87007928884275, "precision": 0.8619955085430864, "recall": 0.8789371235856747, "support": 36380.0 }, "eval_runtime": 4.9595, "eval_samples_per_second": 16.332, "eval_steps_per_second": 2.218, "eval_weighted avg": { "f1-score": 0.9051179642561333, "precision": 0.9065430476942037, "recall": 0.9040956569543706, "support": 36380.0 }, "step": 405 }, { "epoch": 6.0, "eval_Claim": { "f1-score": 0.6622698072805139, "precision": 0.5740163325909429, "recall": 0.7825910931174089, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9223704351629127, "precision": 0.8842767295597485, "recall": 0.9638939670932358, "support": 2188.0 }, "eval_O": { "f1-score": 0.9982004948639124, "precision": 0.9993994444861497, "recall": 0.9970044184827379, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.8780749379985253, "precision": 0.9398091685199799, "recall": 0.8239511918988616, "support": 15899.0 }, "eval_accuracy": 0.8902693787795491, "eval_loss": 0.3604726195335388, "eval_macro avg": { "f1-score": 0.865228918826466, "precision": 0.8493754187892053, "recall": 0.8918601676480611, "support": 36380.0 }, "eval_runtime": 4.991, "eval_samples_per_second": 16.229, "eval_steps_per_second": 2.204, "eval_weighted avg": { "f1-score": 0.8955262233990811, "precision": 0.9086708334194257, "recall": 0.8902693787795491, "support": 36380.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 17.04454231262207, "learning_rate": 1.7530864197530865e-05, "loss": 0.2104, "step": 500 }, { "epoch": 7.0, "eval_Claim": { "f1-score": 0.7037529399734124, "precision": 0.7110973341599504, "recall": 0.6965587044534413, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9196885428253615, "precision": 0.8959687906371911, "recall": 0.9446983546617916, "support": 2188.0 }, "eval_O": { "f1-score": 0.9987257327036954, "precision": 0.9996248780853777, "recall": 0.997828203399985, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9188781285372909, "precision": 0.9187048098082364, "recall": 0.9190515126737531, "support": 15899.0 }, "eval_accuracy": 0.9192963166575041, "eval_loss": 0.36414769291877747, "eval_macro avg": { "f1-score": 0.8852613360099401, "precision": 0.8813489531726889, "recall": 0.8895341937972427, "support": 36380.0 }, "eval_runtime": 4.9835, "eval_samples_per_second": 16.254, "eval_steps_per_second": 2.207, "eval_weighted avg": { "f1-score": 0.9190227083446231, "precision": 0.9188476996393494, "recall": 0.9192963166575041, "support": 36380.0 }, "step": 567 }, { "epoch": 8.0, "eval_Claim": { "f1-score": 0.7025399811853246, "precision": 0.6562390158172232, "recall": 0.7558704453441295, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9105835367206567, "precision": 0.8848641655886158, "recall": 0.9378427787934186, "support": 2188.0 }, "eval_O": { "f1-score": 0.9986513823331086, "precision": 0.9991005172026085, "recall": 0.9982026510896428, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9083384525849526, "precision": 0.9345974717232203, "recall": 0.8835146864582678, "support": 15899.0 }, "eval_accuracy": 0.911544804837823, "eval_loss": 0.35222452878952026, "eval_macro avg": { "f1-score": 0.8800283382060107, "precision": 0.8687002925829169, "recall": 0.8938576404213647, "support": 36380.0 }, "eval_runtime": 4.9741, "eval_samples_per_second": 16.285, "eval_steps_per_second": 2.211, "eval_weighted avg": { "f1-score": 0.9136769997620249, "precision": 0.9174837257168467, "recall": 0.911544804837823, "support": 36380.0 }, "step": 648 }, { "epoch": 9.0, "eval_Claim": { "f1-score": 0.6566933693658913, "precision": 0.6275594908688434, "recall": 0.6886639676113361, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.8702745722244328, "precision": 0.7706131078224101, "recall": 0.9995429616087751, "support": 2188.0 }, "eval_O": { "f1-score": 0.9987648313807688, "precision": 0.9983537862915295, "recall": 0.9991762150827529, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9001826722338205, "precision": 0.9350138917124077, "recall": 0.8678533241084345, "support": 15899.0 }, "eval_accuracy": 0.8996426608026388, "eval_loss": 0.4436376690864563, "eval_macro avg": { "f1-score": 0.8564788613012284, "precision": 0.8328850691737977, "recall": 0.8888091171028246, "support": 36380.0 }, "eval_runtime": 5.0402, "eval_samples_per_second": 16.071, "eval_steps_per_second": 2.182, "eval_weighted avg": { "f1-score": 0.9015045934020747, "precision": 0.90662587513727, "recall": 0.8996426608026388, "support": 36380.0 }, "step": 729 }, { "epoch": 10.0, "eval_Claim": { "f1-score": 0.7371103705155853, "precision": 0.7145572025845686, "recall": 0.7611336032388664, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9365328549002018, "precision": 0.9194187582562747, "recall": 0.9542961608775137, "support": 2188.0 }, "eval_O": { "f1-score": 0.9991386090408599, "precision": 0.9993257416841475, "recall": 0.9989515464689583, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9225428371233836, "precision": 0.9344473837021743, "recall": 0.9109377948298635, "support": 15899.0 }, "eval_accuracy": 0.9255085211654756, "eval_loss": 0.44094258546829224, "eval_macro avg": { "f1-score": 0.8988311678950076, "precision": 0.8919372715567913, "recall": 0.9063297763538004, "support": 36380.0 }, "eval_runtime": 4.9458, "eval_samples_per_second": 16.378, "eval_steps_per_second": 2.224, "eval_weighted avg": { "f1-score": 0.9263184587909816, "precision": 0.927497977075915, "recall": 0.9255085211654756, "support": 36380.0 }, "step": 810 }, { "epoch": 11.0, "eval_Claim": { "f1-score": 0.6750768578394996, "precision": 0.7086579123080348, "recall": 0.6445344129554655, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9008264462809917, "precision": 0.8220211161387632, "recall": 0.9963436928702011, "support": 2188.0 }, "eval_O": { "f1-score": 0.9990631440884392, "precision": 0.9998499849984999, "recall": 0.9982775406275743, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9168605748066159, "precision": 0.9167452681883921, "recall": 0.9169759104346186, "support": 15899.0 }, "eval_accuracy": 0.9145959318306762, "eval_loss": 0.4954843521118164, "eval_macro avg": { "f1-score": 0.8729567557538865, "precision": 0.8618185704084225, "recall": 0.8890328892219649, "support": 36380.0 }, "eval_runtime": 5.0503, "eval_samples_per_second": 16.039, "eval_steps_per_second": 2.178, "eval_weighted avg": { "f1-score": 0.9132365141301059, "precision": 0.9132952764575458, "recall": 0.9145959318306762, "support": 36380.0 }, "step": 891 }, { "epoch": 12.0, "eval_Claim": { "f1-score": 0.6857200770244248, "precision": 0.6866247209255125, "recall": 0.6848178137651821, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9214571488734471, "precision": 0.8543537680593518, "recall": 1.0, "support": 2188.0 }, "eval_O": { "f1-score": 0.9987652935234033, "precision": 0.9979811574697174, "recall": 0.9995506627724107, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9127542413343095, "precision": 0.9239592731022039, "recall": 0.9018177243851815, "support": 15899.0 }, "eval_accuracy": 0.91412864211105, "eval_loss": 0.5210011005401611, "eval_macro avg": { "f1-score": 0.8796741901888963, "precision": 0.8657297298891964, "recall": 0.8965465502306935, "support": 36380.0 }, "eval_runtime": 4.9497, "eval_samples_per_second": 16.365, "eval_steps_per_second": 2.222, "eval_weighted avg": { "f1-score": 0.9140186385274314, "precision": 0.9147147620844138, "recall": 0.91412864211105, "support": 36380.0 }, "step": 972 }, { "epoch": 12.35, "grad_norm": 3.2667109966278076, "learning_rate": 1.506172839506173e-05, "loss": 0.0286, "step": 1000 }, { "epoch": 13.0, "eval_Claim": { "f1-score": 0.7024380291747424, "precision": 0.7079991774624718, "recall": 0.6969635627530364, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9075119409465915, "precision": 0.8643507030603805, "recall": 0.9552102376599635, "support": 2188.0 }, "eval_O": { "f1-score": 0.9986895802912875, "precision": 0.9985774183887391, "recall": 0.9988017673930952, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9200429808482398, "precision": 0.9246014101505431, "recall": 0.9155292785709793, "support": 15899.0 }, "eval_accuracy": 0.9188015393073117, "eval_loss": 0.5909315347671509, "eval_macro avg": { "f1-score": 0.8821706328152152, "precision": 0.8738821772655336, "recall": 0.8916262115942686, "support": 36380.0 }, "eval_runtime": 4.9699, "eval_samples_per_second": 16.298, "eval_steps_per_second": 2.213, "eval_weighted avg": { "f1-score": 0.9186076225687217, "precision": 0.918717904416961, "recall": 0.9188015393073117, "support": 36380.0 }, "step": 1053 }, { "epoch": 14.0, "eval_Claim": { "f1-score": 0.7095000509113125, "precision": 0.7137881581643106, "recall": 0.7052631578947368, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.921028466483012, "precision": 0.9252767527675276, "recall": 0.916819012797075, "support": 2188.0 }, "eval_O": { "f1-score": 0.9994754982766372, "precision": 1.0, "recall": 0.9989515464689583, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9206986297074409, "precision": 0.9180215107553776, "recall": 0.9233914082646707, "support": 15899.0 }, "eval_accuracy": 0.9211105002748763, "eval_loss": 0.559357225894928, "eval_macro avg": { "f1-score": 0.8876756613446006, "precision": 0.889271605421804, "recall": 0.8861062813563602, "support": 36380.0 }, "eval_runtime": 4.9811, "eval_samples_per_second": 16.262, "eval_steps_per_second": 2.208, "eval_weighted avg": { "f1-score": 0.9209544908843665, "precision": 0.9208148168193181, "recall": 0.9211105002748763, "support": 36380.0 }, "step": 1134 }, { "epoch": 15.0, "eval_Claim": { "f1-score": 0.6915445526531356, "precision": 0.6282454109475774, "recall": 0.7690283400809717, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9327135900841081, "precision": 0.9042918454935622, "recall": 0.9629798903107861, "support": 2188.0 }, "eval_O": { "f1-score": 0.9995130539011873, "precision": 0.9998501199040767, "recall": 0.9991762150827529, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.8984226716408141, "precision": 0.936421311139914, "recall": 0.8633876344424177, "support": 15899.0 }, "eval_accuracy": 0.9064046179219352, "eval_loss": 0.6213898658752441, "eval_macro avg": { "f1-score": 0.8805484670698113, "precision": 0.8672021718712826, "recall": 0.898643019979232, "support": 36380.0 }, "eval_runtime": 4.9756, "eval_samples_per_second": 16.28, "eval_steps_per_second": 2.211, "eval_weighted avg": { "f1-score": 0.9094977265082566, "precision": 0.9159231436204942, "recall": 0.9064046179219352, "support": 36380.0 }, "step": 1215 }, { "epoch": 16.0, "eval_Claim": { "f1-score": 0.6819089456869009, "precision": 0.6727738376674547, "recall": 0.6912955465587044, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9290586630286495, "precision": 0.9244343891402715, "recall": 0.9337294332723949, "support": 2188.0 }, "eval_O": { "f1-score": 0.9998127410958391, "precision": 1.0, "recall": 0.9996255523103422, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9087691578448412, "precision": 0.9131843007748, "recall": 0.9043965029247123, "support": 15899.0 }, "eval_accuracy": 0.9121770203408466, "eval_loss": 0.6214332580566406, "eval_macro avg": { "f1-score": 0.8798873769140577, "precision": 0.8775981318956315, "recall": 0.8822617587665385, "support": 36380.0 }, "eval_runtime": 4.962, "eval_samples_per_second": 16.324, "eval_steps_per_second": 2.217, "eval_weighted avg": { "f1-score": 0.9126011797368843, "precision": 0.9130808795913876, "recall": 0.9121770203408466, "support": 36380.0 }, "step": 1296 }, { "epoch": 17.0, "eval_Claim": { "f1-score": 0.6977198697068404, "precision": 0.7524590163934426, "recall": 0.6504048582995952, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.935002195871761, "precision": 0.8998309382924767, "recall": 0.9730347349177331, "support": 2188.0 }, "eval_O": { "f1-score": 0.9994757732344791, "precision": 0.9994757732344791, "recall": 0.9994757732344791, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9219572623103128, "precision": 0.908120309926179, "recall": 0.9362224039247752, "support": 15899.0 }, "eval_accuracy": 0.9228422210005498, "eval_loss": 0.6075455546379089, "eval_macro avg": { "f1-score": 0.8885387752808483, "precision": 0.8899715094616443, "recall": 0.8897844425941457, "support": 36380.0 }, "eval_runtime": 5.0046, "eval_samples_per_second": 16.185, "eval_steps_per_second": 2.198, "eval_weighted avg": { "f1-score": 0.9207454500932067, "precision": 0.9200160099363349, "recall": 0.9228422210005498, "support": 36380.0 }, "step": 1377 }, { "epoch": 18.0, "eval_Claim": { "f1-score": 0.6748943740271293, "precision": 0.7486433152442032, "recall": 0.6143724696356275, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9180620674257648, "precision": 0.8974247053688346, "recall": 0.9396709323583181, "support": 2188.0 }, "eval_O": { "f1-score": 0.9997753145596164, "precision": 0.9998501984870047, "recall": 0.9997004418482738, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.9206027683147655, "precision": 0.898945097099017, "recall": 0.9433297691678721, "support": 15899.0 }, "eval_accuracy": 0.9191313908741067, "eval_loss": 0.6335331201553345, "eval_macro avg": { "f1-score": 0.878333631081819, "precision": 0.8862158290497649, "recall": 0.874268403252523, "support": 36380.0 }, "eval_runtime": 4.9489, "eval_samples_per_second": 16.367, "eval_steps_per_second": 2.223, "eval_weighted avg": { "f1-score": 0.9161451676737934, "precision": 0.9154807870211, "recall": 0.9191313908741067, "support": 36380.0 }, "step": 1458 }, { "epoch": 18.52, "grad_norm": 0.10011027753353119, "learning_rate": 1.2592592592592593e-05, "loss": 0.0091, "step": 1500 }, { "epoch": 19.0, "eval_Claim": { "f1-score": 0.7046035805626598, "precision": 0.7439243924392439, "recall": 0.6692307692307692, "support": 4940.0 }, "eval_MajorClaim": { "f1-score": 0.9264422045893201, "precision": 0.8727272727272727, "recall": 0.9872029250457038, "support": 2188.0 }, "eval_O": { "f1-score": 0.999213041034289, "precision": 1.0, "recall": 0.9984273197034375, "support": 13353.0 }, "eval_Premise": { "f1-score": 0.921880854252529, "precision": 0.9153078306156612, "recall": 0.9285489653437323, "support": 15899.0 }, "eval_accuracy": 0.9225123694337548, "eval_loss": 0.6328368782997131, "eval_macro avg": { "f1-score": 0.8880349201096995, "precision": 0.8829898739455444, "recall": 0.8958524948309107, "support": 36380.0 }, "eval_runtime": 4.9991, "eval_samples_per_second": 16.203, "eval_steps_per_second": 2.2, "eval_weighted avg": { "f1-score": 0.9210355324440019, "precision": 0.9205605544347316, "recall": 0.9225123694337548, "support": 36380.0 }, "step": 1539 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 2723501493480600.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }