|
{ |
|
"best_metric": 0.6115376353591917, |
|
"best_model_checkpoint": "/data2/fxu/lfqa_discourse/t5_large_finetuning_eli5_only_888/checkpoint-504", |
|
"epoch": 30.0, |
|
"global_step": 540, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_Answer": 0.0, |
|
"eval_Answer(Summary)": 0.44135188866799197, |
|
"eval_Answer-Example": 0.0, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.0, |
|
"eval_Miscellaneous": 0.0, |
|
"eval_accuracy": 0.26811594202898553, |
|
"eval_loss": 0.27198654413223267, |
|
"eval_macro_f1": 0.07355864811133199, |
|
"eval_runtime": 13.2965, |
|
"eval_samples_per_second": 4.663, |
|
"eval_steps_per_second": 0.301, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_Answer": 0.4731707317073171, |
|
"eval_Answer(Summary)": 0.4433497536945813, |
|
"eval_Answer-Example": 0.0, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.0, |
|
"eval_Miscellaneous": 0.6, |
|
"eval_accuracy": 0.4082125603864734, |
|
"eval_loss": 0.18863672018051147, |
|
"eval_macro_f1": 0.2527534142336497, |
|
"eval_runtime": 10.164, |
|
"eval_samples_per_second": 6.1, |
|
"eval_steps_per_second": 0.394, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_Answer": 0.375, |
|
"eval_Answer(Summary)": 0.49720670391061456, |
|
"eval_Answer-Example": 0.15384615384615388, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.0, |
|
"eval_Miscellaneous": 0.6923076923076923, |
|
"eval_accuracy": 0.4082125603864734, |
|
"eval_loss": 0.17385143041610718, |
|
"eval_macro_f1": 0.28639342501074344, |
|
"eval_runtime": 12.0287, |
|
"eval_samples_per_second": 5.154, |
|
"eval_steps_per_second": 0.333, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_Answer": 0.4250871080139373, |
|
"eval_Answer(Summary)": 0.4433962264150943, |
|
"eval_Answer-Example": 0.5189189189189188, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.0, |
|
"eval_Miscellaneous": 0.6842105263157895, |
|
"eval_accuracy": 0.4396135265700483, |
|
"eval_loss": 0.1758367419242859, |
|
"eval_macro_f1": 0.3452687966106233, |
|
"eval_runtime": 11.2352, |
|
"eval_samples_per_second": 5.518, |
|
"eval_steps_per_second": 0.356, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_Answer": 0.43205574912891986, |
|
"eval_Answer(Summary)": 0.4, |
|
"eval_Answer-Example": 0.5072463768115941, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.2797202797202797, |
|
"eval_Miscellaneous": 0.765432098765432, |
|
"eval_accuracy": 0.4420289855072464, |
|
"eval_loss": 0.1732899248600006, |
|
"eval_macro_f1": 0.3974090840710376, |
|
"eval_runtime": 11.2192, |
|
"eval_samples_per_second": 5.526, |
|
"eval_steps_per_second": 0.357, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_Answer": 0.475609756097561, |
|
"eval_Answer(Summary)": 0.4120603015075377, |
|
"eval_Answer-Example": 0.5222929936305732, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.02985074626865672, |
|
"eval_Miscellaneous": 0.7123287671232877, |
|
"eval_accuracy": 0.45169082125603865, |
|
"eval_loss": 0.16830046474933624, |
|
"eval_macro_f1": 0.3586904274379361, |
|
"eval_runtime": 11.055, |
|
"eval_samples_per_second": 5.608, |
|
"eval_steps_per_second": 0.362, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_Answer": 0.4397163120567376, |
|
"eval_Answer(Summary)": 0.5263157894736842, |
|
"eval_Answer-Example": 0.6206896551724138, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.25999999999999995, |
|
"eval_Miscellaneous": 0.6732673267326732, |
|
"eval_accuracy": 0.49516908212560384, |
|
"eval_loss": 0.15517334640026093, |
|
"eval_macro_f1": 0.41999818057258476, |
|
"eval_runtime": 11.346, |
|
"eval_samples_per_second": 5.464, |
|
"eval_steps_per_second": 0.353, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_Answer": 0.4920127795527157, |
|
"eval_Answer(Summary)": 0.5025641025641026, |
|
"eval_Answer-Example": 0.6495726495726496, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.32786885245901637, |
|
"eval_Miscellaneous": 0.7272727272727273, |
|
"eval_accuracy": 0.5120772946859904, |
|
"eval_loss": 0.1799505650997162, |
|
"eval_macro_f1": 0.44988185190353525, |
|
"eval_runtime": 10.9313, |
|
"eval_samples_per_second": 5.672, |
|
"eval_steps_per_second": 0.366, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_Answer": 0.5092024539877301, |
|
"eval_Answer(Summary)": 0.5235602094240838, |
|
"eval_Answer-Example": 0.5636363636363636, |
|
"eval_Answer-Organizationalsentence": 0.2857142857142857, |
|
"eval_AuxiliaryInformation": 0.22033898305084745, |
|
"eval_Miscellaneous": 0.736842105263158, |
|
"eval_accuracy": 0.4975845410628019, |
|
"eval_loss": 0.18231035768985748, |
|
"eval_macro_f1": 0.47321573351274476, |
|
"eval_runtime": 10.9336, |
|
"eval_samples_per_second": 5.671, |
|
"eval_steps_per_second": 0.366, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_Answer": 0.42704626334519574, |
|
"eval_Answer(Summary)": 0.6071428571428571, |
|
"eval_Answer-Example": 0.5161290322580646, |
|
"eval_Answer-Organizationalsentence": 0.4, |
|
"eval_AuxiliaryInformation": 0.33333333333333337, |
|
"eval_Miscellaneous": 0.765432098765432, |
|
"eval_accuracy": 0.5024154589371981, |
|
"eval_loss": 0.20293939113616943, |
|
"eval_macro_f1": 0.5081805974741471, |
|
"eval_runtime": 11.8591, |
|
"eval_samples_per_second": 5.228, |
|
"eval_steps_per_second": 0.337, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_Answer": 0.46583850931677023, |
|
"eval_Answer(Summary)": 0.5294117647058824, |
|
"eval_Answer-Example": 0.6153846153846154, |
|
"eval_Answer-Organizationalsentence": 0.4, |
|
"eval_AuxiliaryInformation": 0.28571428571428575, |
|
"eval_Miscellaneous": 0.7297297297297297, |
|
"eval_accuracy": 0.4975845410628019, |
|
"eval_loss": 0.20877273380756378, |
|
"eval_macro_f1": 0.5043464841418805, |
|
"eval_runtime": 10.9599, |
|
"eval_samples_per_second": 5.657, |
|
"eval_steps_per_second": 0.365, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_Answer": 0.42909090909090913, |
|
"eval_Answer(Summary)": 0.5714285714285714, |
|
"eval_Answer-Example": 0.6611570247933884, |
|
"eval_Answer-Organizationalsentence": 0.0, |
|
"eval_AuxiliaryInformation": 0.3364485981308411, |
|
"eval_Miscellaneous": 0.736842105263158, |
|
"eval_accuracy": 0.5193236714975845, |
|
"eval_loss": 0.22424167394638062, |
|
"eval_macro_f1": 0.45582786811781134, |
|
"eval_runtime": 11.5376, |
|
"eval_samples_per_second": 5.374, |
|
"eval_steps_per_second": 0.347, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_Answer": 0.4677966101694915, |
|
"eval_Answer(Summary)": 0.5739910313901346, |
|
"eval_Answer-Example": 0.6721311475409836, |
|
"eval_Answer-Organizationalsentence": 0.4, |
|
"eval_AuxiliaryInformation": 0.31067961165048547, |
|
"eval_Miscellaneous": 0.7749999999999999, |
|
"eval_accuracy": 0.5362318840579711, |
|
"eval_loss": 0.23804587125778198, |
|
"eval_macro_f1": 0.5332664001251824, |
|
"eval_runtime": 10.943, |
|
"eval_samples_per_second": 5.666, |
|
"eval_steps_per_second": 0.366, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_Answer": 0.4834437086092715, |
|
"eval_Answer(Summary)": 0.5517241379310346, |
|
"eval_Answer-Example": 0.6666666666666667, |
|
"eval_Answer-Organizationalsentence": 0.5, |
|
"eval_AuxiliaryInformation": 0.3709677419354839, |
|
"eval_Miscellaneous": 0.75, |
|
"eval_accuracy": 0.5314009661835749, |
|
"eval_loss": 0.2845667898654938, |
|
"eval_macro_f1": 0.5538003758570761, |
|
"eval_runtime": 11.0141, |
|
"eval_samples_per_second": 5.629, |
|
"eval_steps_per_second": 0.363, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_Answer": 0.5100671140939598, |
|
"eval_Answer(Summary)": 0.5714285714285714, |
|
"eval_Answer-Example": 0.6464646464646465, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.4153846153846154, |
|
"eval_Miscellaneous": 0.7126436781609196, |
|
"eval_accuracy": 0.5483091787439613, |
|
"eval_loss": 0.2950053811073303, |
|
"eval_macro_f1": 0.5871092153665631, |
|
"eval_runtime": 11.553, |
|
"eval_samples_per_second": 5.367, |
|
"eval_steps_per_second": 0.346, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_Answer": 0.45637583892617456, |
|
"eval_Answer(Summary)": 0.5945945945945946, |
|
"eval_Answer-Example": 0.607843137254902, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.38333333333333336, |
|
"eval_Miscellaneous": 0.7848101265822784, |
|
"eval_accuracy": 0.5314009661835749, |
|
"eval_loss": 0.28483256697654724, |
|
"eval_macro_f1": 0.5822706162263249, |
|
"eval_runtime": 10.9374, |
|
"eval_samples_per_second": 5.669, |
|
"eval_steps_per_second": 0.366, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_Answer": 0.4557823129251701, |
|
"eval_Answer(Summary)": 0.5952380952380952, |
|
"eval_Answer-Example": 0.6923076923076923, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.3125, |
|
"eval_Miscellaneous": 0.7848101265822784, |
|
"eval_accuracy": 0.5434782608695652, |
|
"eval_loss": 0.29017218947410583, |
|
"eval_macro_f1": 0.5845508156199838, |
|
"eval_runtime": 11.3062, |
|
"eval_samples_per_second": 5.484, |
|
"eval_steps_per_second": 0.354, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_Answer": 0.4621212121212122, |
|
"eval_Answer(Summary)": 0.5844748858447489, |
|
"eval_Answer-Example": 0.6611570247933884, |
|
"eval_Answer-Organizationalsentence": 0.33333333333333337, |
|
"eval_AuxiliaryInformation": 0.43283582089552236, |
|
"eval_Miscellaneous": 0.7380952380952381, |
|
"eval_accuracy": 0.5458937198067633, |
|
"eval_loss": 0.3154158294200897, |
|
"eval_macro_f1": 0.5353362525139073, |
|
"eval_runtime": 11.5419, |
|
"eval_samples_per_second": 5.372, |
|
"eval_steps_per_second": 0.347, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_Answer": 0.46905537459283386, |
|
"eval_Answer(Summary)": 0.5526315789473685, |
|
"eval_Answer-Example": 0.6315789473684211, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.38775510204081637, |
|
"eval_Miscellaneous": 0.7692307692307693, |
|
"eval_accuracy": 0.533816425120773, |
|
"eval_loss": 0.3426768183708191, |
|
"eval_macro_f1": 0.5794864064744794, |
|
"eval_runtime": 11.3895, |
|
"eval_samples_per_second": 5.444, |
|
"eval_steps_per_second": 0.351, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_Answer": 0.5089820359281436, |
|
"eval_Answer(Summary)": 0.5700934579439253, |
|
"eval_Answer-Example": 0.45161290322580644, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.3921568627450981, |
|
"eval_Miscellaneous": 0.765432098765432, |
|
"eval_accuracy": 0.5289855072463768, |
|
"eval_loss": 0.3493908941745758, |
|
"eval_macro_f1": 0.5591573375458453, |
|
"eval_runtime": 11.3644, |
|
"eval_samples_per_second": 5.456, |
|
"eval_steps_per_second": 0.352, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_Answer": 0.4316546762589928, |
|
"eval_Answer(Summary)": 0.5701754385964912, |
|
"eval_Answer-Example": 0.6666666666666667, |
|
"eval_Answer-Organizationalsentence": 0.5, |
|
"eval_AuxiliaryInformation": 0.4, |
|
"eval_Miscellaneous": 0.7749999999999999, |
|
"eval_accuracy": 0.5265700483091788, |
|
"eval_loss": 0.34431466460227966, |
|
"eval_macro_f1": 0.5572494635870251, |
|
"eval_runtime": 11.6127, |
|
"eval_samples_per_second": 5.339, |
|
"eval_steps_per_second": 0.344, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_Answer": 0.4968944099378882, |
|
"eval_Answer(Summary)": 0.5909090909090909, |
|
"eval_Answer-Example": 0.4597701149425288, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.4000000000000001, |
|
"eval_Miscellaneous": 0.7749999999999999, |
|
"eval_accuracy": 0.5314009661835749, |
|
"eval_loss": 0.3664790987968445, |
|
"eval_macro_f1": 0.5648733804093624, |
|
"eval_runtime": 11.1595, |
|
"eval_samples_per_second": 5.556, |
|
"eval_steps_per_second": 0.358, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_Answer": 0.48965517241379314, |
|
"eval_Answer(Summary)": 0.6079295154185023, |
|
"eval_Answer-Example": 0.6666666666666667, |
|
"eval_Answer-Organizationalsentence": 0.5, |
|
"eval_AuxiliaryInformation": 0.416, |
|
"eval_Miscellaneous": 0.7749999999999999, |
|
"eval_accuracy": 0.5603864734299517, |
|
"eval_loss": 0.36973538994789124, |
|
"eval_macro_f1": 0.575875225749827, |
|
"eval_runtime": 11.3197, |
|
"eval_samples_per_second": 5.477, |
|
"eval_steps_per_second": 0.353, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_Answer": 0.4774193548387097, |
|
"eval_Answer(Summary)": 0.5833333333333334, |
|
"eval_Answer-Example": 0.5217391304347826, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.40944881889763785, |
|
"eval_Miscellaneous": 0.759493670886076, |
|
"eval_accuracy": 0.5265700483091788, |
|
"eval_loss": 0.3806516230106354, |
|
"eval_macro_f1": 0.5696834958428677, |
|
"eval_runtime": 11.2648, |
|
"eval_samples_per_second": 5.504, |
|
"eval_steps_per_second": 0.355, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_Answer": 0.4871794871794871, |
|
"eval_Answer(Summary)": 0.588785046728972, |
|
"eval_Answer-Example": 0.5833333333333334, |
|
"eval_Answer-Organizationalsentence": 0.5, |
|
"eval_AuxiliaryInformation": 0.3934426229508197, |
|
"eval_Miscellaneous": 0.759493670886076, |
|
"eval_accuracy": 0.5362318840579711, |
|
"eval_loss": 0.39177072048187256, |
|
"eval_macro_f1": 0.552039026846448, |
|
"eval_runtime": 11.3644, |
|
"eval_samples_per_second": 5.456, |
|
"eval_steps_per_second": 0.352, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_Answer": 0.5116279069767442, |
|
"eval_Answer(Summary)": 0.5915492957746479, |
|
"eval_Answer-Example": 0.7102803738317757, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.4032258064516129, |
|
"eval_Miscellaneous": 0.759493670886076, |
|
"eval_accuracy": 0.5652173913043478, |
|
"eval_loss": 0.3835026025772095, |
|
"eval_macro_f1": 0.6071406200979206, |
|
"eval_runtime": 11.3582, |
|
"eval_samples_per_second": 5.459, |
|
"eval_steps_per_second": 0.352, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_Answer": 0.5100671140939598, |
|
"eval_Answer(Summary)": 0.609090909090909, |
|
"eval_Answer-Example": 0.69811320754717, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.4132231404958678, |
|
"eval_Miscellaneous": 0.759493670886076, |
|
"eval_accuracy": 0.5700483091787439, |
|
"eval_loss": 0.3833402693271637, |
|
"eval_macro_f1": 0.6094424514634416, |
|
"eval_runtime": 11.4693, |
|
"eval_samples_per_second": 5.406, |
|
"eval_steps_per_second": 0.349, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 27.78, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0945, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_Answer": 0.5167785234899329, |
|
"eval_Answer(Summary)": 0.5972850678733032, |
|
"eval_Answer-Example": 0.7222222222222223, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.4067796610169492, |
|
"eval_Miscellaneous": 0.759493670886076, |
|
"eval_accuracy": 0.572463768115942, |
|
"eval_loss": 0.39141198992729187, |
|
"eval_macro_f1": 0.6115376353591917, |
|
"eval_runtime": 11.4624, |
|
"eval_samples_per_second": 5.409, |
|
"eval_steps_per_second": 0.349, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_Answer": 0.5214521452145214, |
|
"eval_Answer(Summary)": 0.599078341013825, |
|
"eval_Answer-Example": 0.7102803738317757, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.4067796610169492, |
|
"eval_Miscellaneous": 0.759493670886076, |
|
"eval_accuracy": 0.572463768115942, |
|
"eval_loss": 0.3956356346607208, |
|
"eval_macro_f1": 0.610625143104969, |
|
"eval_runtime": 11.2747, |
|
"eval_samples_per_second": 5.499, |
|
"eval_steps_per_second": 0.355, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_Answer": 0.5197368421052633, |
|
"eval_Answer(Summary)": 0.5925925925925926, |
|
"eval_Answer-Example": 0.7102803738317757, |
|
"eval_Answer-Organizationalsentence": 0.6666666666666666, |
|
"eval_AuxiliaryInformation": 0.4067796610169492, |
|
"eval_Miscellaneous": 0.759493670886076, |
|
"eval_accuracy": 0.5700483091787439, |
|
"eval_loss": 0.39962947368621826, |
|
"eval_macro_f1": 0.6092583011832206, |
|
"eval_runtime": 11.3436, |
|
"eval_samples_per_second": 5.466, |
|
"eval_steps_per_second": 0.353, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"step": 540, |
|
"total_flos": 1.5886402423296e+16, |
|
"train_loss": 0.08773032142608254, |
|
"train_runtime": 1826.4445, |
|
"train_samples_per_second": 4.714, |
|
"train_steps_per_second": 0.296 |
|
} |
|
], |
|
"max_steps": 540, |
|
"num_train_epochs": 30, |
|
"total_flos": 1.5886402423296e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|