{ "best_metric": 0.6115376353591917, "best_model_checkpoint": "/data2/fxu/lfqa_discourse/t5_large_finetuning_eli5_only_888/checkpoint-504", "epoch": 30.0, "global_step": 540, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_Answer": 0.0, "eval_Answer(Summary)": 0.44135188866799197, "eval_Answer-Example": 0.0, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.0, "eval_Miscellaneous": 0.0, "eval_accuracy": 0.26811594202898553, "eval_loss": 0.27198654413223267, "eval_macro_f1": 0.07355864811133199, "eval_runtime": 13.2965, "eval_samples_per_second": 4.663, "eval_steps_per_second": 0.301, "step": 18 }, { "epoch": 2.0, "eval_Answer": 0.4731707317073171, "eval_Answer(Summary)": 0.4433497536945813, "eval_Answer-Example": 0.0, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.0, "eval_Miscellaneous": 0.6, "eval_accuracy": 0.4082125603864734, "eval_loss": 0.18863672018051147, "eval_macro_f1": 0.2527534142336497, "eval_runtime": 10.164, "eval_samples_per_second": 6.1, "eval_steps_per_second": 0.394, "step": 36 }, { "epoch": 3.0, "eval_Answer": 0.375, "eval_Answer(Summary)": 0.49720670391061456, "eval_Answer-Example": 0.15384615384615388, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.0, "eval_Miscellaneous": 0.6923076923076923, "eval_accuracy": 0.4082125603864734, "eval_loss": 0.17385143041610718, "eval_macro_f1": 0.28639342501074344, "eval_runtime": 12.0287, "eval_samples_per_second": 5.154, "eval_steps_per_second": 0.333, "step": 54 }, { "epoch": 4.0, "eval_Answer": 0.4250871080139373, "eval_Answer(Summary)": 0.4433962264150943, "eval_Answer-Example": 0.5189189189189188, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.0, "eval_Miscellaneous": 0.6842105263157895, "eval_accuracy": 0.4396135265700483, "eval_loss": 0.1758367419242859, "eval_macro_f1": 0.3452687966106233, "eval_runtime": 11.2352, "eval_samples_per_second": 5.518, "eval_steps_per_second": 0.356, "step": 72 }, { "epoch": 5.0, "eval_Answer": 0.43205574912891986, "eval_Answer(Summary)": 0.4, "eval_Answer-Example": 0.5072463768115941, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.2797202797202797, "eval_Miscellaneous": 0.765432098765432, "eval_accuracy": 0.4420289855072464, "eval_loss": 0.1732899248600006, "eval_macro_f1": 0.3974090840710376, "eval_runtime": 11.2192, "eval_samples_per_second": 5.526, "eval_steps_per_second": 0.357, "step": 90 }, { "epoch": 6.0, "eval_Answer": 0.475609756097561, "eval_Answer(Summary)": 0.4120603015075377, "eval_Answer-Example": 0.5222929936305732, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.02985074626865672, "eval_Miscellaneous": 0.7123287671232877, "eval_accuracy": 0.45169082125603865, "eval_loss": 0.16830046474933624, "eval_macro_f1": 0.3586904274379361, "eval_runtime": 11.055, "eval_samples_per_second": 5.608, "eval_steps_per_second": 0.362, "step": 108 }, { "epoch": 7.0, "eval_Answer": 0.4397163120567376, "eval_Answer(Summary)": 0.5263157894736842, "eval_Answer-Example": 0.6206896551724138, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.25999999999999995, "eval_Miscellaneous": 0.6732673267326732, "eval_accuracy": 0.49516908212560384, "eval_loss": 0.15517334640026093, "eval_macro_f1": 0.41999818057258476, "eval_runtime": 11.346, "eval_samples_per_second": 5.464, "eval_steps_per_second": 0.353, "step": 126 }, { "epoch": 8.0, "eval_Answer": 0.4920127795527157, "eval_Answer(Summary)": 0.5025641025641026, "eval_Answer-Example": 0.6495726495726496, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.32786885245901637, "eval_Miscellaneous": 0.7272727272727273, "eval_accuracy": 0.5120772946859904, "eval_loss": 0.1799505650997162, "eval_macro_f1": 0.44988185190353525, "eval_runtime": 10.9313, "eval_samples_per_second": 5.672, "eval_steps_per_second": 0.366, "step": 144 }, { "epoch": 9.0, "eval_Answer": 0.5092024539877301, "eval_Answer(Summary)": 0.5235602094240838, "eval_Answer-Example": 0.5636363636363636, "eval_Answer-Organizationalsentence": 0.2857142857142857, "eval_AuxiliaryInformation": 0.22033898305084745, "eval_Miscellaneous": 0.736842105263158, "eval_accuracy": 0.4975845410628019, "eval_loss": 0.18231035768985748, "eval_macro_f1": 0.47321573351274476, "eval_runtime": 10.9336, "eval_samples_per_second": 5.671, "eval_steps_per_second": 0.366, "step": 162 }, { "epoch": 10.0, "eval_Answer": 0.42704626334519574, "eval_Answer(Summary)": 0.6071428571428571, "eval_Answer-Example": 0.5161290322580646, "eval_Answer-Organizationalsentence": 0.4, "eval_AuxiliaryInformation": 0.33333333333333337, "eval_Miscellaneous": 0.765432098765432, "eval_accuracy": 0.5024154589371981, "eval_loss": 0.20293939113616943, "eval_macro_f1": 0.5081805974741471, "eval_runtime": 11.8591, "eval_samples_per_second": 5.228, "eval_steps_per_second": 0.337, "step": 180 }, { "epoch": 11.0, "eval_Answer": 0.46583850931677023, "eval_Answer(Summary)": 0.5294117647058824, "eval_Answer-Example": 0.6153846153846154, "eval_Answer-Organizationalsentence": 0.4, "eval_AuxiliaryInformation": 0.28571428571428575, "eval_Miscellaneous": 0.7297297297297297, "eval_accuracy": 0.4975845410628019, "eval_loss": 0.20877273380756378, "eval_macro_f1": 0.5043464841418805, "eval_runtime": 10.9599, "eval_samples_per_second": 5.657, "eval_steps_per_second": 0.365, "step": 198 }, { "epoch": 12.0, "eval_Answer": 0.42909090909090913, "eval_Answer(Summary)": 0.5714285714285714, "eval_Answer-Example": 0.6611570247933884, "eval_Answer-Organizationalsentence": 0.0, "eval_AuxiliaryInformation": 0.3364485981308411, "eval_Miscellaneous": 0.736842105263158, "eval_accuracy": 0.5193236714975845, "eval_loss": 0.22424167394638062, "eval_macro_f1": 0.45582786811781134, "eval_runtime": 11.5376, "eval_samples_per_second": 5.374, "eval_steps_per_second": 0.347, "step": 216 }, { "epoch": 13.0, "eval_Answer": 0.4677966101694915, "eval_Answer(Summary)": 0.5739910313901346, "eval_Answer-Example": 0.6721311475409836, "eval_Answer-Organizationalsentence": 0.4, "eval_AuxiliaryInformation": 0.31067961165048547, "eval_Miscellaneous": 0.7749999999999999, "eval_accuracy": 0.5362318840579711, "eval_loss": 0.23804587125778198, "eval_macro_f1": 0.5332664001251824, "eval_runtime": 10.943, "eval_samples_per_second": 5.666, "eval_steps_per_second": 0.366, "step": 234 }, { "epoch": 14.0, "eval_Answer": 0.4834437086092715, "eval_Answer(Summary)": 0.5517241379310346, "eval_Answer-Example": 0.6666666666666667, "eval_Answer-Organizationalsentence": 0.5, "eval_AuxiliaryInformation": 0.3709677419354839, "eval_Miscellaneous": 0.75, "eval_accuracy": 0.5314009661835749, "eval_loss": 0.2845667898654938, "eval_macro_f1": 0.5538003758570761, "eval_runtime": 11.0141, "eval_samples_per_second": 5.629, "eval_steps_per_second": 0.363, "step": 252 }, { "epoch": 15.0, "eval_Answer": 0.5100671140939598, "eval_Answer(Summary)": 0.5714285714285714, "eval_Answer-Example": 0.6464646464646465, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.4153846153846154, "eval_Miscellaneous": 0.7126436781609196, "eval_accuracy": 0.5483091787439613, "eval_loss": 0.2950053811073303, "eval_macro_f1": 0.5871092153665631, "eval_runtime": 11.553, "eval_samples_per_second": 5.367, "eval_steps_per_second": 0.346, "step": 270 }, { "epoch": 16.0, "eval_Answer": 0.45637583892617456, "eval_Answer(Summary)": 0.5945945945945946, "eval_Answer-Example": 0.607843137254902, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.38333333333333336, "eval_Miscellaneous": 0.7848101265822784, "eval_accuracy": 0.5314009661835749, "eval_loss": 0.28483256697654724, "eval_macro_f1": 0.5822706162263249, "eval_runtime": 10.9374, "eval_samples_per_second": 5.669, "eval_steps_per_second": 0.366, "step": 288 }, { "epoch": 17.0, "eval_Answer": 0.4557823129251701, "eval_Answer(Summary)": 0.5952380952380952, "eval_Answer-Example": 0.6923076923076923, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.3125, "eval_Miscellaneous": 0.7848101265822784, "eval_accuracy": 0.5434782608695652, "eval_loss": 0.29017218947410583, "eval_macro_f1": 0.5845508156199838, "eval_runtime": 11.3062, "eval_samples_per_second": 5.484, "eval_steps_per_second": 0.354, "step": 306 }, { "epoch": 18.0, "eval_Answer": 0.4621212121212122, "eval_Answer(Summary)": 0.5844748858447489, "eval_Answer-Example": 0.6611570247933884, "eval_Answer-Organizationalsentence": 0.33333333333333337, "eval_AuxiliaryInformation": 0.43283582089552236, "eval_Miscellaneous": 0.7380952380952381, "eval_accuracy": 0.5458937198067633, "eval_loss": 0.3154158294200897, "eval_macro_f1": 0.5353362525139073, "eval_runtime": 11.5419, "eval_samples_per_second": 5.372, "eval_steps_per_second": 0.347, "step": 324 }, { "epoch": 19.0, "eval_Answer": 0.46905537459283386, "eval_Answer(Summary)": 0.5526315789473685, "eval_Answer-Example": 0.6315789473684211, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.38775510204081637, "eval_Miscellaneous": 0.7692307692307693, "eval_accuracy": 0.533816425120773, "eval_loss": 0.3426768183708191, "eval_macro_f1": 0.5794864064744794, "eval_runtime": 11.3895, "eval_samples_per_second": 5.444, "eval_steps_per_second": 0.351, "step": 342 }, { "epoch": 20.0, "eval_Answer": 0.5089820359281436, "eval_Answer(Summary)": 0.5700934579439253, "eval_Answer-Example": 0.45161290322580644, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.3921568627450981, "eval_Miscellaneous": 0.765432098765432, "eval_accuracy": 0.5289855072463768, "eval_loss": 0.3493908941745758, "eval_macro_f1": 0.5591573375458453, "eval_runtime": 11.3644, "eval_samples_per_second": 5.456, "eval_steps_per_second": 0.352, "step": 360 }, { "epoch": 21.0, "eval_Answer": 0.4316546762589928, "eval_Answer(Summary)": 0.5701754385964912, "eval_Answer-Example": 0.6666666666666667, "eval_Answer-Organizationalsentence": 0.5, "eval_AuxiliaryInformation": 0.4, "eval_Miscellaneous": 0.7749999999999999, "eval_accuracy": 0.5265700483091788, "eval_loss": 0.34431466460227966, "eval_macro_f1": 0.5572494635870251, "eval_runtime": 11.6127, "eval_samples_per_second": 5.339, "eval_steps_per_second": 0.344, "step": 378 }, { "epoch": 22.0, "eval_Answer": 0.4968944099378882, "eval_Answer(Summary)": 0.5909090909090909, "eval_Answer-Example": 0.4597701149425288, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.4000000000000001, "eval_Miscellaneous": 0.7749999999999999, "eval_accuracy": 0.5314009661835749, "eval_loss": 0.3664790987968445, "eval_macro_f1": 0.5648733804093624, "eval_runtime": 11.1595, "eval_samples_per_second": 5.556, "eval_steps_per_second": 0.358, "step": 396 }, { "epoch": 23.0, "eval_Answer": 0.48965517241379314, "eval_Answer(Summary)": 0.6079295154185023, "eval_Answer-Example": 0.6666666666666667, "eval_Answer-Organizationalsentence": 0.5, "eval_AuxiliaryInformation": 0.416, "eval_Miscellaneous": 0.7749999999999999, "eval_accuracy": 0.5603864734299517, "eval_loss": 0.36973538994789124, "eval_macro_f1": 0.575875225749827, "eval_runtime": 11.3197, "eval_samples_per_second": 5.477, "eval_steps_per_second": 0.353, "step": 414 }, { "epoch": 24.0, "eval_Answer": 0.4774193548387097, "eval_Answer(Summary)": 0.5833333333333334, "eval_Answer-Example": 0.5217391304347826, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.40944881889763785, "eval_Miscellaneous": 0.759493670886076, "eval_accuracy": 0.5265700483091788, "eval_loss": 0.3806516230106354, "eval_macro_f1": 0.5696834958428677, "eval_runtime": 11.2648, "eval_samples_per_second": 5.504, "eval_steps_per_second": 0.355, "step": 432 }, { "epoch": 25.0, "eval_Answer": 0.4871794871794871, "eval_Answer(Summary)": 0.588785046728972, "eval_Answer-Example": 0.5833333333333334, "eval_Answer-Organizationalsentence": 0.5, "eval_AuxiliaryInformation": 0.3934426229508197, "eval_Miscellaneous": 0.759493670886076, "eval_accuracy": 0.5362318840579711, "eval_loss": 0.39177072048187256, "eval_macro_f1": 0.552039026846448, "eval_runtime": 11.3644, "eval_samples_per_second": 5.456, "eval_steps_per_second": 0.352, "step": 450 }, { "epoch": 26.0, "eval_Answer": 0.5116279069767442, "eval_Answer(Summary)": 0.5915492957746479, "eval_Answer-Example": 0.7102803738317757, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.4032258064516129, "eval_Miscellaneous": 0.759493670886076, "eval_accuracy": 0.5652173913043478, "eval_loss": 0.3835026025772095, "eval_macro_f1": 0.6071406200979206, "eval_runtime": 11.3582, "eval_samples_per_second": 5.459, "eval_steps_per_second": 0.352, "step": 468 }, { "epoch": 27.0, "eval_Answer": 0.5100671140939598, "eval_Answer(Summary)": 0.609090909090909, "eval_Answer-Example": 0.69811320754717, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.4132231404958678, "eval_Miscellaneous": 0.759493670886076, "eval_accuracy": 0.5700483091787439, "eval_loss": 0.3833402693271637, "eval_macro_f1": 0.6094424514634416, "eval_runtime": 11.4693, "eval_samples_per_second": 5.406, "eval_steps_per_second": 0.349, "step": 486 }, { "epoch": 27.78, "learning_rate": 7.4074074074074075e-06, "loss": 0.0945, "step": 500 }, { "epoch": 28.0, "eval_Answer": 0.5167785234899329, "eval_Answer(Summary)": 0.5972850678733032, "eval_Answer-Example": 0.7222222222222223, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.4067796610169492, "eval_Miscellaneous": 0.759493670886076, "eval_accuracy": 0.572463768115942, "eval_loss": 0.39141198992729187, "eval_macro_f1": 0.6115376353591917, "eval_runtime": 11.4624, "eval_samples_per_second": 5.409, "eval_steps_per_second": 0.349, "step": 504 }, { "epoch": 29.0, "eval_Answer": 0.5214521452145214, "eval_Answer(Summary)": 0.599078341013825, "eval_Answer-Example": 0.7102803738317757, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.4067796610169492, "eval_Miscellaneous": 0.759493670886076, "eval_accuracy": 0.572463768115942, "eval_loss": 0.3956356346607208, "eval_macro_f1": 0.610625143104969, "eval_runtime": 11.2747, "eval_samples_per_second": 5.499, "eval_steps_per_second": 0.355, "step": 522 }, { "epoch": 30.0, "eval_Answer": 0.5197368421052633, "eval_Answer(Summary)": 0.5925925925925926, "eval_Answer-Example": 0.7102803738317757, "eval_Answer-Organizationalsentence": 0.6666666666666666, "eval_AuxiliaryInformation": 0.4067796610169492, "eval_Miscellaneous": 0.759493670886076, "eval_accuracy": 0.5700483091787439, "eval_loss": 0.39962947368621826, "eval_macro_f1": 0.6092583011832206, "eval_runtime": 11.3436, "eval_samples_per_second": 5.466, "eval_steps_per_second": 0.353, "step": 540 }, { "epoch": 30.0, "step": 540, "total_flos": 1.5886402423296e+16, "train_loss": 0.08773032142608254, "train_runtime": 1826.4445, "train_samples_per_second": 4.714, "train_steps_per_second": 0.296 } ], "max_steps": 540, "num_train_epochs": 30, "total_flos": 1.5886402423296e+16, "trial_name": null, "trial_params": null }