diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,11533 @@ +{ + "best_metric": 43.6158, + "best_model_checkpoint": "/output/checkpoint-7500", + "epoch": 2.0, + "global_step": 8748, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 4.0781, + "step": 1 + }, + { + "epoch": 0.0, + "eval_exact_match": 4.3577, + "eval_exact_match_for_answerability_classification": 4.1538, + "eval_exact_match_for_cause_effect_classification": 6.7143, + "eval_exact_match_for_coreference_resolution": 6.1429, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 1.0, + "eval_exact_match_for_grammar_error_correction": 0.0, + "eval_exact_match_for_keyword_tagging": 3.4, + "eval_exact_match_for_overlap_extraction": 1.0, + "eval_exact_match_for_question_rewriting": 0.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 14.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 2.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 1.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 2.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 0.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 1.0, + "eval_exact_match_for_task1153_bard_word_analogy": 11.0, + "eval_exact_match_for_task1154_bard_word_analogy": 10.0, + "eval_exact_match_for_task1155_bard_word_analogy": 45.0, + "eval_exact_match_for_task1156_bard_word_analogy": 22.0, + "eval_exact_match_for_task1157_bard_word_analogy": 1.0, + "eval_exact_match_for_task1158_bard_word_analogy": 0.0, + "eval_exact_match_for_task1159_bard_word_analogy": 0.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 0.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 0.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 0.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 0.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 0.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 42.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 46.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 0.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 0.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 0.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 31.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 0.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 0.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 43.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 0.0, + "eval_exact_match_for_task219_rocstories_title_generation": 0.0, + "eval_exact_match_for_task220_rocstories_title_generation": 0.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 0.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 0.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 1.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 40.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 0.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 1.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 0.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 1.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 0.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 0.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 0.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 0.0, + "eval_exact_match_for_task613_liar_keyword_tagging": 6.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 0.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 0.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 10.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 1.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 0.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 1.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 1.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 0.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 7.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 2.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 0.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 11.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 2.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 0.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 0.0, + "eval_exact_match_for_textual_entailment": 8.875, + "eval_exact_match_for_title_generation": 0.1121, + "eval_exact_match_for_word_analogy": 11.25, + "eval_f1": 16.0863, + "eval_f1_for_answerability_classification": 6.7345, + "eval_f1_for_cause_effect_classification": 22.2334, + "eval_f1_for_coreference_resolution": 12.3166, + "eval_f1_for_data_to_text": 27.221, + "eval_f1_for_dialogue_act_recognition": 2.6924, + "eval_f1_for_grammar_error_correction": 24.755, + "eval_f1_for_keyword_tagging": 16.1097, + "eval_f1_for_overlap_extraction": 15.4014, + "eval_f1_for_question_rewriting": 36.2278, + "eval_f1_for_task020_mctaco_answerability_classification": 14.9454, + "eval_f1_for_task033_winogrande_coreference_resolution": 2.0, + "eval_f1_for_task034_winogrande_question_rewriting": 40.1765, + "eval_f1_for_task035_winogrande_question_rewriting": 27.933, + "eval_f1_for_task036_qasc_keyword_tagging": 33.8152, + "eval_f1_for_task039_qasc_overlap_extraction": 5.032, + "eval_f1_for_task050_multirc_answerability_classification": 1.2594, + "eval_f1_for_task102_commongen_data_to_text": 22.2161, + "eval_f1_for_task1152_bard_word_analogy": 1.4439, + "eval_f1_for_task1153_bard_word_analogy": 14.9324, + "eval_f1_for_task1154_bard_word_analogy": 12.0719, + "eval_f1_for_task1155_bard_word_analogy": 46.6529, + "eval_f1_for_task1156_bard_word_analogy": 23.3621, + "eval_f1_for_task1157_bard_word_analogy": 3.8698, + "eval_f1_for_task1158_bard_word_analogy": 1.9822, + "eval_f1_for_task1159_bard_word_analogy": 2.31, + "eval_f1_for_task1161_coda_19_title_generation": 14.0353, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 48.8754, + "eval_f1_for_task121_zest_question_rewriting": 34.3052, + "eval_f1_for_task133_winowhy_coreference_resolution": 17.2727, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 3.3077, + "eval_f1_for_task1344_rte_textual_entailment": 4.2733, + "eval_f1_for_task1345_qqp_question_rewriting": 22.0552, + "eval_f1_for_task1356_xlsum_title_generation": 5.5499, + "eval_f1_for_task1358_xlsum_title_generation": 18.815, + "eval_f1_for_task1385_anli_textual_entailment": 3.4677, + "eval_f1_for_task1386_anli_textual_entailment": 3.914, + "eval_f1_for_task1387_anli_textual_entailment": 2.9187, + "eval_f1_for_task1388_cb_textual_entailment": 6.6485, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 11.7874, + "eval_f1_for_task1391_winogrande_coreference_resolution": 42.3485, + "eval_f1_for_task1393_copa_cause_effect_classification": 46.2503, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 0.0519, + "eval_f1_for_task1407_dart_data_to_text": 0.1538, + "eval_f1_for_task1409_dart_data_to_text": 23.6453, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 9.7281, + "eval_f1_for_task1439_doqa_answerability_classification": 0.3814, + "eval_f1_for_task1442_doqa_answerability_classification": 0.1724, + "eval_f1_for_task1516_imppres_textual_entailment": 2.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 6.1111, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 0.2456, + "eval_f1_for_task1540_peer_read_title_generation": 8.2109, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 39.7819, + "eval_f1_for_task1562_zest_question_rewriting": 32.1105, + "eval_f1_for_task1586_scifact_title_generation": 19.1731, + "eval_f1_for_task1598_nyc_data_to_text": 34.1828, + "eval_f1_for_task1612_sick_textual_entailment": 18.8571, + "eval_f1_for_task1615_sick_textual_entailment": 31.1113, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 44.6732, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 0.1379, + "eval_f1_for_task1631_open_pi_data_to_text": 70.3762, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 0.6383, + "eval_f1_for_task1659_billsum_title_generation": 17.4135, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 31.5952, + "eval_f1_for_task1728_web_nlg_data_to_text": 24.0147, + "eval_f1_for_task190_snli_textual_entailment": 43.6667, + "eval_f1_for_task199_multinli_textual_entailment": 0.595, + "eval_f1_for_task200_multinli_textual_entailment": 2.5694, + "eval_f1_for_task201_multinli_textual_entailment": 2.7358, + "eval_f1_for_task202_multinli_textual_entailment": 2.4762, + "eval_f1_for_task219_rocstories_title_generation": 8.6343, + "eval_f1_for_task220_rocstories_title_generation": 2.6853, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.3718, + "eval_f1_for_task232_iirc_answerability_classification": 1.7286, + "eval_f1_for_task233_iirc_answerability_classification": 1.1419, + "eval_f1_for_task242_tweetqa_answerability_classification": 0.2, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 5.3045, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 25.7708, + "eval_f1_for_task288_gigaword_title_generation": 10.3122, + "eval_f1_for_task290_tellmewhy_answerability_classification": 64.9757, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 0.7642, + "eval_f1_for_task329_gap_coreference_resolution": 27.4224, + "eval_f1_for_task330_gap_coreference_resolution": 4.3515, + "eval_f1_for_task349_squad2.0_answerability_classification": 0.7412, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 4.8153, + "eval_f1_for_task391_cod3s_cause_effect_classification": 13.1287, + "eval_f1_for_task392_cod3s_cause_effect_classification": 19.6947, + "eval_f1_for_task393_cod3s_cause_effect_classification": 16.5305, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 1.6049, + "eval_f1_for_task402_grailqa_question_rewriting": 17.9211, + "eval_f1_for_task418_persent_title_generation": 6.0598, + "eval_f1_for_task442_com_qa_question_rewriting": 45.098, + "eval_f1_for_task500_scruples_title_generation": 5.1492, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 27.4294, + "eval_f1_for_task520_aquamuse_answerability_classification": 0.8546, + "eval_f1_for_task569_recipe_nlg_title_generation": 2.8296, + "eval_f1_for_task602_wikitext_title_generation": 1.1955, + "eval_f1_for_task613_liar_keyword_tagging": 6.8112, + "eval_f1_for_task614_glucose_cause_effect_classification": 32.8577, + "eval_f1_for_task619_ohsumed_title_generation": 21.1067, + "eval_f1_for_task620_ohsumed_keyword_tagging": 2.968, + "eval_f1_for_task623_ohsumed_keyword_tagging": 1.9001, + "eval_f1_for_task640_e_snli_textual_entailment": 35.5317, + "eval_f1_for_task641_e_snli_textual_entailment": 6.6143, + "eval_f1_for_task642_e_snli_textual_entailment": 3.0442, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 35.0541, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 12.9399, + "eval_f1_for_task670_ambigqa_question_rewriting": 49.4624, + "eval_f1_for_task671_ambigqa_question_rewriting": 35.8955, + "eval_f1_for_task677_ollie_data_to_text": 10.2406, + "eval_f1_for_task738_perspectrum_textual_entailment": 26.0, + "eval_f1_for_task743_eurlex_title_generation": 16.796, + "eval_f1_for_task760_msr_sqa_data_to_text": 6.7381, + "eval_f1_for_task769_qed_title_generation": 5.4319, + "eval_f1_for_task827_copa_cause_effect_classification": 3.5314, + "eval_f1_for_task828_copa_cause_effect_classification": 23.6405, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 7.1164, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 0.5062, + "eval_f1_for_task890_gwsd_textual_entailment": 4.8759, + "eval_f1_for_task891_gap_coreference_resolution": 1.8058, + "eval_f1_for_task892_gap_coreference_resolution": 1.4478, + "eval_f1_for_task893_gap_coreference_resolution": 11.7879, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 28.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 34.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_f1_for_task957_e2e_data_to_text": 38.2635, + "eval_f1_for_task970_sherliic_textual_entailment": 0.0, + "eval_f1_for_textual_entailment": 16.5264, + "eval_f1_for_title_generation": 10.8713, + "eval_f1_for_word_analogy": 13.3282, + "eval_gen_len": 67.4907, + "eval_global_step": 1, + "eval_loss": 5.825923442840576, + "eval_rouge1": 17.1556, + "eval_rouge1_for_answerability_classification": 6.9612, + "eval_rouge1_for_cause_effect_classification": 23.2619, + "eval_rouge1_for_coreference_resolution": 12.2321, + "eval_rouge1_for_data_to_text": 28.9788, + "eval_rouge1_for_dialogue_act_recognition": 2.9675, + "eval_rouge1_for_grammar_error_correction": 27.4192, + "eval_rouge1_for_keyword_tagging": 19.1413, + "eval_rouge1_for_overlap_extraction": 15.9166, + "eval_rouge1_for_question_rewriting": 37.8011, + "eval_rouge1_for_task020_mctaco_answerability_classification": 14.9253, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 2.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 40.18, + "eval_rouge1_for_task035_winogrande_question_rewriting": 28.5684, + "eval_rouge1_for_task036_qasc_keyword_tagging": 38.4105, + "eval_rouge1_for_task039_qasc_overlap_extraction": 5.5648, + "eval_rouge1_for_task050_multirc_answerability_classification": 1.239, + "eval_rouge1_for_task102_commongen_data_to_text": 25.1556, + "eval_rouge1_for_task1152_bard_word_analogy": 1.4439, + "eval_rouge1_for_task1153_bard_word_analogy": 14.9324, + "eval_rouge1_for_task1154_bard_word_analogy": 12.0719, + "eval_rouge1_for_task1155_bard_word_analogy": 46.6529, + "eval_rouge1_for_task1156_bard_word_analogy": 23.3621, + "eval_rouge1_for_task1157_bard_word_analogy": 3.8698, + "eval_rouge1_for_task1158_bard_word_analogy": 1.9822, + "eval_rouge1_for_task1159_bard_word_analogy": 2.31, + "eval_rouge1_for_task1161_coda_19_title_generation": 15.6173, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 50.5973, + "eval_rouge1_for_task121_zest_question_rewriting": 35.8984, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 17.2552, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 3.4767, + "eval_rouge1_for_task1344_rte_textual_entailment": 4.2451, + "eval_rouge1_for_task1345_qqp_question_rewriting": 25.3715, + "eval_rouge1_for_task1356_xlsum_title_generation": 6.7252, + "eval_rouge1_for_task1358_xlsum_title_generation": 22.6166, + "eval_rouge1_for_task1385_anli_textual_entailment": 3.3944, + "eval_rouge1_for_task1386_anli_textual_entailment": 3.8898, + "eval_rouge1_for_task1387_anli_textual_entailment": 2.8804, + "eval_rouge1_for_task1388_cb_textual_entailment": 6.5454, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 10.5202, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 42.3485, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 46.2503, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 1.7699, + "eval_rouge1_for_task1407_dart_data_to_text": 0.1333, + "eval_rouge1_for_task1409_dart_data_to_text": 26.5262, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 11.2941, + "eval_rouge1_for_task1439_doqa_answerability_classification": 0.3518, + "eval_rouge1_for_task1442_doqa_answerability_classification": 0.1588, + "eval_rouge1_for_task1516_imppres_textual_entailment": 2.1429, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 6.1111, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 0.2411, + "eval_rouge1_for_task1540_peer_read_title_generation": 9.8643, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 43.5442, + "eval_rouge1_for_task1562_zest_question_rewriting": 33.4326, + "eval_rouge1_for_task1586_scifact_title_generation": 21.5836, + "eval_rouge1_for_task1598_nyc_data_to_text": 35.0782, + "eval_rouge1_for_task1612_sick_textual_entailment": 25.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 69.7486, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 46.6024, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 0.1375, + "eval_rouge1_for_task1631_open_pi_data_to_text": 71.5652, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 0.6115, + "eval_rouge1_for_task1659_billsum_title_generation": 18.3944, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 31.5952, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 29.0585, + "eval_rouge1_for_task190_snli_textual_entailment": 43.6667, + "eval_rouge1_for_task199_multinli_textual_entailment": 0.5436, + "eval_rouge1_for_task200_multinli_textual_entailment": 2.4098, + "eval_rouge1_for_task201_multinli_textual_entailment": 2.5034, + "eval_rouge1_for_task202_multinli_textual_entailment": 2.299, + "eval_rouge1_for_task219_rocstories_title_generation": 9.5974, + "eval_rouge1_for_task220_rocstories_title_generation": 2.6692, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.3693, + "eval_rouge1_for_task232_iirc_answerability_classification": 1.7291, + "eval_rouge1_for_task233_iirc_answerability_classification": 1.111, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 0.2, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 5.169, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 26.2685, + "eval_rouge1_for_task288_gigaword_title_generation": 11.6838, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 68.0757, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 0.7768, + "eval_rouge1_for_task329_gap_coreference_resolution": 27.4194, + "eval_rouge1_for_task330_gap_coreference_resolution": 4.4484, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 0.739, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 4.4862, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 13.1287, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 19.6815, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 16.7856, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 1.622, + "eval_rouge1_for_task402_grailqa_question_rewriting": 18.5982, + "eval_rouge1_for_task418_persent_title_generation": 7.1168, + "eval_rouge1_for_task442_com_qa_question_rewriting": 48.8338, + "eval_rouge1_for_task500_scruples_title_generation": 5.6212, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 27.7634, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 0.8474, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 3.3803, + "eval_rouge1_for_task602_wikitext_title_generation": 1.3429, + "eval_rouge1_for_task613_liar_keyword_tagging": 16.3735, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 40.0469, + "eval_rouge1_for_task619_ohsumed_title_generation": 22.7517, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 3.3574, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 1.4981, + "eval_rouge1_for_task640_e_snli_textual_entailment": 35.5317, + "eval_rouge1_for_task641_e_snli_textual_entailment": 6.6143, + "eval_rouge1_for_task642_e_snli_textual_entailment": 3.0442, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 36.0673, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 13.0482, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 50.763, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 36.9663, + "eval_rouge1_for_task677_ollie_data_to_text": 11.266, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 26.0, + "eval_rouge1_for_task743_eurlex_title_generation": 17.5519, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 7.197, + "eval_rouge1_for_task769_qed_title_generation": 5.4913, + "eval_rouge1_for_task827_copa_cause_effect_classification": 3.3003, + "eval_rouge1_for_task828_copa_cause_effect_classification": 23.6405, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 7.1164, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 1.0476, + "eval_rouge1_for_task890_gwsd_textual_entailment": 4.8751, + "eval_rouge1_for_task891_gap_coreference_resolution": 1.8544, + "eval_rouge1_for_task892_gap_coreference_resolution": 1.4174, + "eval_rouge1_for_task893_gap_coreference_resolution": 11.775, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 28.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 34.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_rouge1_for_task957_e2e_data_to_text": 38.7107, + "eval_rouge1_for_task970_sherliic_textual_entailment": 0.0, + "eval_rouge1_for_textual_entailment": 18.3611, + "eval_rouge1_for_title_generation": 11.9413, + "eval_rouge1_for_word_analogy": 13.3282, + "eval_rougeL": 16.264, + "eval_rougeL_for_answerability_classification": 6.9612, + "eval_rougeL_for_cause_effect_classification": 22.5403, + "eval_rougeL_for_coreference_resolution": 12.2063, + "eval_rougeL_for_data_to_text": 24.8131, + "eval_rougeL_for_dialogue_act_recognition": 2.9675, + "eval_rougeL_for_grammar_error_correction": 25.7517, + "eval_rougeL_for_keyword_tagging": 18.6803, + "eval_rougeL_for_overlap_extraction": 14.9766, + "eval_rougeL_for_question_rewriting": 34.8206, + "eval_rougeL_for_task020_mctaco_answerability_classification": 14.9253, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 2.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 39.9943, + "eval_rougeL_for_task035_winogrande_question_rewriting": 25.4934, + "eval_rougeL_for_task036_qasc_keyword_tagging": 36.8414, + "eval_rougeL_for_task039_qasc_overlap_extraction": 5.5648, + "eval_rougeL_for_task050_multirc_answerability_classification": 1.239, + "eval_rougeL_for_task102_commongen_data_to_text": 22.9528, + "eval_rougeL_for_task1152_bard_word_analogy": 1.4439, + "eval_rougeL_for_task1153_bard_word_analogy": 14.9324, + "eval_rougeL_for_task1154_bard_word_analogy": 12.0719, + "eval_rougeL_for_task1155_bard_word_analogy": 46.6529, + "eval_rougeL_for_task1156_bard_word_analogy": 23.3621, + "eval_rougeL_for_task1157_bard_word_analogy": 3.8698, + "eval_rougeL_for_task1158_bard_word_analogy": 1.9822, + "eval_rougeL_for_task1159_bard_word_analogy": 2.31, + "eval_rougeL_for_task1161_coda_19_title_generation": 11.999, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 49.1978, + "eval_rougeL_for_task121_zest_question_rewriting": 30.6903, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 17.2552, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 3.3901, + "eval_rougeL_for_task1344_rte_textual_entailment": 4.2451, + "eval_rougeL_for_task1345_qqp_question_rewriting": 21.7993, + "eval_rougeL_for_task1356_xlsum_title_generation": 5.3975, + "eval_rougeL_for_task1358_xlsum_title_generation": 19.5501, + "eval_rougeL_for_task1385_anli_textual_entailment": 3.3944, + "eval_rougeL_for_task1386_anli_textual_entailment": 3.8898, + "eval_rougeL_for_task1387_anli_textual_entailment": 2.8804, + "eval_rougeL_for_task1388_cb_textual_entailment": 6.5454, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 10.5202, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 42.3485, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 46.2503, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 1.7699, + "eval_rougeL_for_task1407_dart_data_to_text": 0.1333, + "eval_rougeL_for_task1409_dart_data_to_text": 24.1715, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 9.5332, + "eval_rougeL_for_task1439_doqa_answerability_classification": 0.3518, + "eval_rougeL_for_task1442_doqa_answerability_classification": 0.1588, + "eval_rougeL_for_task1516_imppres_textual_entailment": 2.1429, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 6.1111, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 0.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 0.2411, + "eval_rougeL_for_task1540_peer_read_title_generation": 9.0499, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 41.9702, + "eval_rougeL_for_task1562_zest_question_rewriting": 29.8385, + "eval_rougeL_for_task1586_scifact_title_generation": 17.7013, + "eval_rougeL_for_task1598_nyc_data_to_text": 24.0093, + "eval_rougeL_for_task1612_sick_textual_entailment": 25.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 69.7486, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 44.1437, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 0.1375, + "eval_rougeL_for_task1631_open_pi_data_to_text": 68.3006, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 0.6115, + "eval_rougeL_for_task1659_billsum_title_generation": 14.8126, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 31.5952, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 24.216, + "eval_rougeL_for_task190_snli_textual_entailment": 43.6667, + "eval_rougeL_for_task199_multinli_textual_entailment": 0.5436, + "eval_rougeL_for_task200_multinli_textual_entailment": 2.4098, + "eval_rougeL_for_task201_multinli_textual_entailment": 2.5034, + "eval_rougeL_for_task202_multinli_textual_entailment": 2.299, + "eval_rougeL_for_task219_rocstories_title_generation": 9.4597, + "eval_rougeL_for_task220_rocstories_title_generation": 2.6692, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 0.3693, + "eval_rougeL_for_task232_iirc_answerability_classification": 1.7291, + "eval_rougeL_for_task233_iirc_answerability_classification": 1.111, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 0.2, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 5.1208, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 24.3884, + "eval_rougeL_for_task288_gigaword_title_generation": 10.3299, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 68.0757, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 0.7768, + "eval_rougeL_for_task329_gap_coreference_resolution": 27.4194, + "eval_rougeL_for_task330_gap_coreference_resolution": 4.4484, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 0.739, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 4.4862, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 13.1287, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 19.6815, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 16.1085, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 1.622, + "eval_rougeL_for_task402_grailqa_question_rewriting": 15.9932, + "eval_rougeL_for_task418_persent_title_generation": 6.1388, + "eval_rougeL_for_task442_com_qa_question_rewriting": 45.1043, + "eval_rougeL_for_task500_scruples_title_generation": 4.6551, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 27.1822, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 0.8474, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 3.0879, + "eval_rougeL_for_task602_wikitext_title_generation": 1.3429, + "eval_rougeL_for_task613_liar_keyword_tagging": 16.3735, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 35.6727, + "eval_rougeL_for_task619_ohsumed_title_generation": 20.7225, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 3.1546, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 1.4981, + "eval_rougeL_for_task640_e_snli_textual_entailment": 35.5317, + "eval_rougeL_for_task641_e_snli_textual_entailment": 6.6143, + "eval_rougeL_for_task642_e_snli_textual_entailment": 3.0442, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 35.534, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 12.7354, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 46.6034, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 34.1685, + "eval_rougeL_for_task677_ollie_data_to_text": 10.6139, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 26.0, + "eval_rougeL_for_task743_eurlex_title_generation": 14.2451, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.5832, + "eval_rougeL_for_task769_qed_title_generation": 5.4458, + "eval_rougeL_for_task827_copa_cause_effect_classification": 3.3003, + "eval_rougeL_for_task828_copa_cause_effect_classification": 23.6405, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 7.1164, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 1.0476, + "eval_rougeL_for_task890_gwsd_textual_entailment": 4.8751, + "eval_rougeL_for_task891_gap_coreference_resolution": 1.8544, + "eval_rougeL_for_task892_gap_coreference_resolution": 1.4174, + "eval_rougeL_for_task893_gap_coreference_resolution": 11.775, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 28.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 34.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 33.3333, + "eval_rougeL_for_task957_e2e_data_to_text": 29.1074, + "eval_rougeL_for_task970_sherliic_textual_entailment": 0.0, + "eval_rougeL_for_textual_entailment": 18.3611, + "eval_rougeL_for_title_generation": 10.4801, + "eval_rougeL_for_word_analogy": 13.3282, + "eval_runtime": 1291.9813, + "eval_samples_per_second": 9.218, + "eval_steps_per_second": 0.289, + "step": 1 + }, + { + "epoch": 0.01, + "learning_rate": 5e-05, + "loss": 2.783, + "step": 50 + }, + { + "epoch": 0.01, + "eval_exact_match": 24.2821, + "eval_exact_match_for_answerability_classification": 49.6154, + "eval_exact_match_for_cause_effect_classification": 35.8571, + "eval_exact_match_for_coreference_resolution": 30.2857, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 24.8571, + "eval_exact_match_for_grammar_error_correction": 1.5, + "eval_exact_match_for_keyword_tagging": 28.6, + "eval_exact_match_for_overlap_extraction": 7.0, + "eval_exact_match_for_question_rewriting": 1.5455, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 23.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 6.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 14.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 12.0, + "eval_exact_match_for_task1153_bard_word_analogy": 4.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 30.0, + "eval_exact_match_for_task1157_bard_word_analogy": 27.0, + "eval_exact_match_for_task1158_bard_word_analogy": 13.0, + "eval_exact_match_for_task1159_bard_word_analogy": 10.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 47.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 3.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 43.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 2.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 8.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 39.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 28.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 9.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 34.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 0.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 5.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 9.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 11.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 68.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 16.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 51.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 46.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 72.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 42.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 20.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.5417, + "eval_exact_match_for_title_generation": 5.9978, + "eval_exact_match_for_word_analogy": 20.125, + "eval_f1": 39.3785, + "eval_f1_for_answerability_classification": 52.1795, + "eval_f1_for_cause_effect_classification": 52.9226, + "eval_f1_for_coreference_resolution": 40.5717, + "eval_f1_for_data_to_text": 37.052, + "eval_f1_for_dialogue_act_recognition": 34.381, + "eval_f1_for_grammar_error_correction": 39.7161, + "eval_f1_for_keyword_tagging": 42.9843, + "eval_f1_for_overlap_extraction": 26.2416, + "eval_f1_for_question_rewriting": 60.9978, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 23.525, + "eval_f1_for_task034_winogrande_question_rewriting": 81.3093, + "eval_f1_for_task035_winogrande_question_rewriting": 69.5835, + "eval_f1_for_task036_qasc_keyword_tagging": 47.7306, + "eval_f1_for_task039_qasc_overlap_extraction": 18.0342, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 41.9174, + "eval_f1_for_task1152_bard_word_analogy": 12.0, + "eval_f1_for_task1153_bard_word_analogy": 7.3333, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 30.0, + "eval_f1_for_task1157_bard_word_analogy": 27.0, + "eval_f1_for_task1158_bard_word_analogy": 13.0, + "eval_f1_for_task1159_bard_word_analogy": 10.0, + "eval_f1_for_task1161_coda_19_title_generation": 17.2518, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 73.1456, + "eval_f1_for_task121_zest_question_rewriting": 43.9032, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 0.95, + "eval_f1_for_task1344_rte_textual_entailment": 51.0, + "eval_f1_for_task1345_qqp_question_rewriting": 34.8199, + "eval_f1_for_task1356_xlsum_title_generation": 5.6258, + "eval_f1_for_task1358_xlsum_title_generation": 25.5619, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 6.5993, + "eval_f1_for_task1409_dart_data_to_text": 37.9521, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 20.2802, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 3.0, + "eval_f1_for_task1540_peer_read_title_generation": 17.6508, + "eval_f1_for_task1554_scitail_textual_entailment": 47.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 59.152, + "eval_f1_for_task1562_zest_question_rewriting": 41.7134, + "eval_f1_for_task1586_scifact_title_generation": 21.9768, + "eval_f1_for_task1598_nyc_data_to_text": 34.3603, + "eval_f1_for_task1612_sick_textual_entailment": 43.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 70.0052, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_f1_for_task1631_open_pi_data_to_text": 74.1559, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 29.7506, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.3413, + "eval_f1_for_task1728_web_nlg_data_to_text": 36.7056, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 8.0, + "eval_f1_for_task219_rocstories_title_generation": 16.3591, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 39.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 38.119, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 34.449, + "eval_f1_for_task288_gigaword_title_generation": 23.8495, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 50.1714, + "eval_f1_for_task329_gap_coreference_resolution": 34.0, + "eval_f1_for_task330_gap_coreference_resolution": 42.5333, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 24.9076, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 7.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 75.5655, + "eval_f1_for_task418_persent_title_generation": 15.7701, + "eval_f1_for_task442_com_qa_question_rewriting": 57.752, + "eval_f1_for_task500_scruples_title_generation": 9.6406, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 33.6688, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 8.5883, + "eval_f1_for_task602_wikitext_title_generation": 4.963, + "eval_f1_for_task613_liar_keyword_tagging": 10.4762, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.8838, + "eval_f1_for_task619_ohsumed_title_generation": 26.4815, + "eval_f1_for_task620_ohsumed_keyword_tagging": 24.6539, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 83.0608, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 41.5281, + "eval_f1_for_task670_ambigqa_question_rewriting": 68.0051, + "eval_f1_for_task671_ambigqa_question_rewriting": 55.1735, + "eval_f1_for_task677_ollie_data_to_text": 23.9318, + "eval_f1_for_task738_perspectrum_textual_entailment": 51.0, + "eval_f1_for_task743_eurlex_title_generation": 21.1665, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.6557, + "eval_f1_for_task769_qed_title_generation": 66.8615, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 51.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 72.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 42.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.119, + "eval_f1_for_task892_gap_coreference_resolution": 48.0, + "eval_f1_for_task893_gap_coreference_resolution": 20.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 49.4767, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.5417, + "eval_f1_for_title_generation": 22.1593, + "eval_f1_for_word_analogy": 20.5417, + "eval_gen_len": 10.8253, + "eval_global_step": 50, + "eval_loss": 1.7051361799240112, + "eval_rouge1": 40.7946, + "eval_rouge1_for_answerability_classification": 52.1795, + "eval_rouge1_for_cause_effect_classification": 53.7249, + "eval_rouge1_for_coreference_resolution": 41.0658, + "eval_rouge1_for_data_to_text": 39.8987, + "eval_rouge1_for_dialogue_act_recognition": 36.3571, + "eval_rouge1_for_grammar_error_correction": 43.6967, + "eval_rouge1_for_keyword_tagging": 45.4186, + "eval_rouge1_for_overlap_extraction": 27.4984, + "eval_rouge1_for_question_rewriting": 62.8363, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 26.525, + "eval_rouge1_for_task034_winogrande_question_rewriting": 81.3026, + "eval_rouge1_for_task035_winogrande_question_rewriting": 71.3338, + "eval_rouge1_for_task036_qasc_keyword_tagging": 50.8832, + "eval_rouge1_for_task039_qasc_overlap_extraction": 19.0342, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 52.2655, + "eval_rouge1_for_task1152_bard_word_analogy": 12.0, + "eval_rouge1_for_task1153_bard_word_analogy": 7.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 30.0, + "eval_rouge1_for_task1157_bard_word_analogy": 27.0, + "eval_rouge1_for_task1158_bard_word_analogy": 13.0, + "eval_rouge1_for_task1159_bard_word_analogy": 10.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 19.8824, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 74.5073, + "eval_rouge1_for_task121_zest_question_rewriting": 46.7294, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 0.9222, + "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 38.1145, + "eval_rouge1_for_task1356_xlsum_title_generation": 7.0967, + "eval_rouge1_for_task1358_xlsum_title_generation": 29.629, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.5, + "eval_rouge1_for_task1407_dart_data_to_text": 6.581, + "eval_rouge1_for_task1409_dart_data_to_text": 38.3018, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 25.2738, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 3.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 21.0107, + "eval_rouge1_for_task1554_scitail_textual_entailment": 47.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 62.1196, + "eval_rouge1_for_task1562_zest_question_rewriting": 44.4079, + "eval_rouge1_for_task1586_scifact_title_generation": 24.6279, + "eval_rouge1_for_task1598_nyc_data_to_text": 36.7917, + "eval_rouge1_for_task1612_sick_textual_entailment": 43.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 70.7124, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 74.8641, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 29.8637, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.3413, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 43.6242, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 8.0, + "eval_rouge1_for_task219_rocstories_title_generation": 22.5114, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 39.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 39.3643, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 35.9626, + "eval_rouge1_for_task288_gigaword_title_generation": 26.3969, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 51.469, + "eval_rouge1_for_task329_gap_coreference_resolution": 34.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 42.5333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 25.4955, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 9.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 77.2563, + "eval_rouge1_for_task418_persent_title_generation": 18.156, + "eval_rouge1_for_task442_com_qa_question_rewriting": 61.4784, + "eval_rouge1_for_task500_scruples_title_generation": 10.4518, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 34.0921, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 9.7201, + "eval_rouge1_for_task602_wikitext_title_generation": 5.2483, + "eval_rouge1_for_task613_liar_keyword_tagging": 14.2095, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 32.9121, + "eval_rouge1_for_task619_ohsumed_title_generation": 29.5715, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 28.6598, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 84.3404, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 40.8838, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 69.0715, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 56.2854, + "eval_rouge1_for_task677_ollie_data_to_text": 25.9481, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 51.0, + "eval_rouge1_for_task743_eurlex_title_generation": 21.5965, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.7502, + "eval_rouge1_for_task769_qed_title_generation": 67.2928, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 72.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 42.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 52.9714, + "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 20.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.2116, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.375, + "eval_rouge1_for_title_generation": 23.9479, + "eval_rouge1_for_word_analogy": 20.5417, + "eval_rougeL": 39.686, + "eval_rougeL_for_answerability_classification": 52.1795, + "eval_rougeL_for_cause_effect_classification": 53.2421, + "eval_rougeL_for_coreference_resolution": 41.0658, + "eval_rougeL_for_data_to_text": 34.3432, + "eval_rougeL_for_dialogue_act_recognition": 36.3571, + "eval_rougeL_for_grammar_error_correction": 42.1947, + "eval_rougeL_for_keyword_tagging": 44.7327, + "eval_rougeL_for_overlap_extraction": 27.0969, + "eval_rougeL_for_question_rewriting": 58.7571, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 26.525, + "eval_rougeL_for_task034_winogrande_question_rewriting": 81.0194, + "eval_rougeL_for_task035_winogrande_question_rewriting": 67.4769, + "eval_rougeL_for_task036_qasc_keyword_tagging": 48.2129, + "eval_rougeL_for_task039_qasc_overlap_extraction": 19.0342, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 45.7217, + "eval_rougeL_for_task1152_bard_word_analogy": 12.0, + "eval_rougeL_for_task1153_bard_word_analogy": 7.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 30.0, + "eval_rougeL_for_task1157_bard_word_analogy": 27.0, + "eval_rougeL_for_task1158_bard_word_analogy": 13.0, + "eval_rougeL_for_task1159_bard_word_analogy": 10.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 16.0663, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 73.4008, + "eval_rougeL_for_task121_zest_question_rewriting": 40.4539, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 0.9222, + "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 35.1081, + "eval_rougeL_for_task1356_xlsum_title_generation": 5.8981, + "eval_rougeL_for_task1358_xlsum_title_generation": 26.0251, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.5, + "eval_rougeL_for_task1407_dart_data_to_text": 5.4727, + "eval_rougeL_for_task1409_dart_data_to_text": 32.7271, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 24.1744, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 49.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 27.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 3.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 19.8069, + "eval_rougeL_for_task1554_scitail_textual_entailment": 47.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 60.2151, + "eval_rougeL_for_task1562_zest_question_rewriting": 38.9418, + "eval_rougeL_for_task1586_scifact_title_generation": 21.4694, + "eval_rougeL_for_task1598_nyc_data_to_text": 27.4212, + "eval_rougeL_for_task1612_sick_textual_entailment": 43.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 68.0684, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 71.4779, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 26.9265, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 62.3413, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 37.9882, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 8.0, + "eval_rougeL_for_task219_rocstories_title_generation": 21.7114, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 39.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 39.3643, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 35.1595, + "eval_rougeL_for_task288_gigaword_title_generation": 22.1142, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 51.469, + "eval_rougeL_for_task329_gap_coreference_resolution": 34.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 42.5333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 66.6667, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 24.8068, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 9.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 64.2592, + "eval_rougeL_for_task418_persent_title_generation": 16.5131, + "eval_rougeL_for_task442_com_qa_question_rewriting": 56.0563, + "eval_rougeL_for_task500_scruples_title_generation": 9.504, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 33.667, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 9.5868, + "eval_rougeL_for_task602_wikitext_title_generation": 5.176, + "eval_rougeL_for_task613_liar_keyword_tagging": 14.2095, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 30.2213, + "eval_rougeL_for_task619_ohsumed_title_generation": 26.8474, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 28.3005, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 83.9404, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 40.8838, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 67.1489, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 54.3945, + "eval_rougeL_for_task677_ollie_data_to_text": 22.443, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 51.0, + "eval_rougeL_for_task743_eurlex_title_generation": 18.312, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.564, + "eval_rougeL_for_task769_qed_title_generation": 66.8483, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 72.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 42.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 52.9714, + "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 20.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 61.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 39.4964, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.375, + "eval_rougeL_for_title_generation": 22.2291, + "eval_rougeL_for_word_analogy": 20.5417, + "eval_runtime": 446.467, + "eval_samples_per_second": 26.676, + "eval_steps_per_second": 0.835, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 2.0326, + "step": 100 + }, + { + "epoch": 0.02, + "eval_exact_match": 24.9622, + "eval_exact_match_for_answerability_classification": 49.5385, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 30.3571, + "eval_exact_match_for_data_to_text": 0.3632, + "eval_exact_match_for_dialogue_act_recognition": 35.1429, + "eval_exact_match_for_grammar_error_correction": 2.0, + "eval_exact_match_for_keyword_tagging": 29.8, + "eval_exact_match_for_overlap_extraction": 7.5, + "eval_exact_match_for_question_rewriting": 1.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 29.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 7.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 15.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 11.0, + "eval_exact_match_for_task1153_bard_word_analogy": 2.0, + "eval_exact_match_for_task1154_bard_word_analogy": 17.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 27.0, + "eval_exact_match_for_task1157_bard_word_analogy": 26.0, + "eval_exact_match_for_task1158_bard_word_analogy": 12.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 5.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 4.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 1.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 16.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 14.0, + "eval_exact_match_for_task219_rocstories_title_generation": 9.0, + "eval_exact_match_for_task220_rocstories_title_generation": 45.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 42.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 18.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 12.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 6.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 1.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 13.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 65.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 6.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 51.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 48.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 42.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 47.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 25.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.9583, + "eval_exact_match_for_title_generation": 6.1659, + "eval_exact_match_for_word_analogy": 19.25, + "eval_f1": 40.0448, + "eval_f1_for_answerability_classification": 52.0513, + "eval_f1_for_cause_effect_classification": 52.6302, + "eval_f1_for_coreference_resolution": 39.1258, + "eval_f1_for_data_to_text": 39.7755, + "eval_f1_for_dialogue_act_recognition": 38.7857, + "eval_f1_for_grammar_error_correction": 44.8074, + "eval_f1_for_keyword_tagging": 44.5629, + "eval_f1_for_overlap_extraction": 25.9971, + "eval_f1_for_question_rewriting": 63.0105, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 29.0, + "eval_f1_for_task034_winogrande_question_rewriting": 80.2305, + "eval_f1_for_task035_winogrande_question_rewriting": 79.8715, + "eval_f1_for_task036_qasc_keyword_tagging": 49.3152, + "eval_f1_for_task039_qasc_overlap_extraction": 20.3333, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 43.4491, + "eval_f1_for_task1152_bard_word_analogy": 11.0, + "eval_f1_for_task1153_bard_word_analogy": 6.6667, + "eval_f1_for_task1154_bard_word_analogy": 17.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 27.0, + "eval_f1_for_task1157_bard_word_analogy": 26.0, + "eval_f1_for_task1158_bard_word_analogy": 12.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 19.3125, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 75.4373, + "eval_f1_for_task121_zest_question_rewriting": 45.6998, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 1.6885, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 36.5585, + "eval_f1_for_task1356_xlsum_title_generation": 7.062, + "eval_f1_for_task1358_xlsum_title_generation": 25.8617, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_f1_for_task1407_dart_data_to_text": 27.1972, + "eval_f1_for_task1409_dart_data_to_text": 39.277, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 21.9276, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 36.0, + "eval_f1_for_task1540_peer_read_title_generation": 19.3761, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 67.6873, + "eval_f1_for_task1562_zest_question_rewriting": 46.2523, + "eval_f1_for_task1586_scifact_title_generation": 23.3463, + "eval_f1_for_task1598_nyc_data_to_text": 37.405, + "eval_f1_for_task1612_sick_textual_entailment": 48.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 69.5757, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 68.7634, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 30.3814, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 64.5754, + "eval_f1_for_task1728_web_nlg_data_to_text": 38.4169, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0308, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 16.5035, + "eval_f1_for_task219_rocstories_title_generation": 19.8464, + "eval_f1_for_task220_rocstories_title_generation": 45.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 49.0, + "eval_f1_for_task233_iirc_answerability_classification": 42.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 27.0833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 31.6609, + "eval_f1_for_task288_gigaword_title_generation": 22.4568, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 55.9971, + "eval_f1_for_task329_gap_coreference_resolution": 35.0, + "eval_f1_for_task330_gap_coreference_resolution": 41.5333, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 27.2267, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 8.5, + "eval_f1_for_task402_grailqa_question_rewriting": 76.4178, + "eval_f1_for_task418_persent_title_generation": 14.3869, + "eval_f1_for_task442_com_qa_question_rewriting": 61.9867, + "eval_f1_for_task500_scruples_title_generation": 9.341, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.1625, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 12.2546, + "eval_f1_for_task602_wikitext_title_generation": 5.0009, + "eval_f1_for_task613_liar_keyword_tagging": 15.25, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.518, + "eval_f1_for_task619_ohsumed_title_generation": 27.3427, + "eval_f1_for_task620_ohsumed_keyword_tagging": 29.3762, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 79.8732, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 13.3857, + "eval_f1_for_task670_ambigqa_question_rewriting": 66.341, + "eval_f1_for_task671_ambigqa_question_rewriting": 54.7447, + "eval_f1_for_task677_ollie_data_to_text": 23.109, + "eval_f1_for_task738_perspectrum_textual_entailment": 51.0, + "eval_f1_for_task743_eurlex_title_generation": 21.2154, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.8589, + "eval_f1_for_task769_qed_title_generation": 63.4764, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 42.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.6857, + "eval_f1_for_task892_gap_coreference_resolution": 47.0, + "eval_f1_for_task893_gap_coreference_resolution": 25.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 50.1847, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.0639, + "eval_f1_for_title_generation": 22.3493, + "eval_f1_for_word_analogy": 19.8333, + "eval_gen_len": 11.3817, + "eval_global_step": 100, + "eval_loss": 1.56145179271698, + "eval_rouge1": 41.6154, + "eval_rouge1_for_answerability_classification": 52.0513, + "eval_rouge1_for_cause_effect_classification": 53.5642, + "eval_rouge1_for_coreference_resolution": 39.6526, + "eval_rouge1_for_data_to_text": 42.7953, + "eval_rouge1_for_dialogue_act_recognition": 41.4333, + "eval_rouge1_for_grammar_error_correction": 49.298, + "eval_rouge1_for_keyword_tagging": 48.1237, + "eval_rouge1_for_overlap_extraction": 28.4299, + "eval_rouge1_for_question_rewriting": 64.7005, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 32.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 80.3287, + "eval_rouge1_for_task035_winogrande_question_rewriting": 81.2558, + "eval_rouge1_for_task036_qasc_keyword_tagging": 51.1463, + "eval_rouge1_for_task039_qasc_overlap_extraction": 24.3333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 52.97, + "eval_rouge1_for_task1152_bard_word_analogy": 11.0, + "eval_rouge1_for_task1153_bard_word_analogy": 6.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 17.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 27.0, + "eval_rouge1_for_task1157_bard_word_analogy": 26.0, + "eval_rouge1_for_task1158_bard_word_analogy": 12.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 21.6786, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 75.7781, + "eval_rouge1_for_task121_zest_question_rewriting": 47.9463, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 1.7207, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 40.4638, + "eval_rouge1_for_task1356_xlsum_title_generation": 8.2891, + "eval_rouge1_for_task1358_xlsum_title_generation": 30.4048, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 11.2, + "eval_rouge1_for_task1407_dart_data_to_text": 27.5754, + "eval_rouge1_for_task1409_dart_data_to_text": 39.9531, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 27.8011, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 36.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 23.0095, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 70.7949, + "eval_rouge1_for_task1562_zest_question_rewriting": 49.0414, + "eval_rouge1_for_task1586_scifact_title_generation": 26.1659, + "eval_rouge1_for_task1598_nyc_data_to_text": 40.0722, + "eval_rouge1_for_task1612_sick_textual_entailment": 48.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 70.3786, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 69.4795, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 31.3326, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 64.5754, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 47.1475, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0299, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 16.4023, + "eval_rouge1_for_task219_rocstories_title_generation": 26.4884, + "eval_rouge1_for_task220_rocstories_title_generation": 45.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 42.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 28.0083, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 32.5265, + "eval_rouge1_for_task288_gigaword_title_generation": 25.3254, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 57.1947, + "eval_rouge1_for_task329_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 41.5333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.0219, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 11.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 78.0764, + "eval_rouge1_for_task418_persent_title_generation": 16.915, + "eval_rouge1_for_task442_com_qa_question_rewriting": 65.2226, + "eval_rouge1_for_task500_scruples_title_generation": 10.8322, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 32.6278, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 14.6457, + "eval_rouge1_for_task602_wikitext_title_generation": 5.6212, + "eval_rouge1_for_task613_liar_keyword_tagging": 24.75, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 30.2609, + "eval_rouge1_for_task619_ohsumed_title_generation": 29.9918, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 34.5635, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 81.1589, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 12.9727, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 67.4817, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 55.7323, + "eval_rouge1_for_task677_ollie_data_to_text": 25.2335, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 51.0, + "eval_rouge1_for_task743_eurlex_title_generation": 21.472, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.0664, + "eval_rouge1_for_task769_qed_title_generation": 63.6128, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 42.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.6857, + "eval_rouge1_for_task892_gap_coreference_resolution": 47.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 25.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.2608, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.893, + "eval_rouge1_for_title_generation": 24.3405, + "eval_rouge1_for_word_analogy": 19.8333, + "eval_rougeL": 40.4673, + "eval_rougeL_for_answerability_classification": 52.0513, + "eval_rougeL_for_cause_effect_classification": 53.0483, + "eval_rougeL_for_coreference_resolution": 39.6526, + "eval_rougeL_for_data_to_text": 36.6727, + "eval_rougeL_for_dialogue_act_recognition": 41.4333, + "eval_rougeL_for_grammar_error_correction": 47.7458, + "eval_rougeL_for_keyword_tagging": 47.5883, + "eval_rougeL_for_overlap_extraction": 28.0256, + "eval_rougeL_for_question_rewriting": 60.7477, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 32.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 80.0289, + "eval_rougeL_for_task035_winogrande_question_rewriting": 78.3457, + "eval_rougeL_for_task036_qasc_keyword_tagging": 48.8025, + "eval_rougeL_for_task039_qasc_overlap_extraction": 24.3333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 45.9103, + "eval_rougeL_for_task1152_bard_word_analogy": 11.0, + "eval_rougeL_for_task1153_bard_word_analogy": 6.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 17.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 27.0, + "eval_rougeL_for_task1157_bard_word_analogy": 26.0, + "eval_rougeL_for_task1158_bard_word_analogy": 12.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 17.7242, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 74.3772, + "eval_rougeL_for_task121_zest_question_rewriting": 42.3279, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 1.6493, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 37.3063, + "eval_rougeL_for_task1356_xlsum_title_generation": 7.4559, + "eval_rougeL_for_task1358_xlsum_title_generation": 26.4538, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 11.2, + "eval_rougeL_for_task1407_dart_data_to_text": 24.1982, + "eval_rougeL_for_task1409_dart_data_to_text": 34.12, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 26.3605, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 28.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 36.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 21.7014, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 69.1311, + "eval_rougeL_for_task1562_zest_question_rewriting": 42.8206, + "eval_rougeL_for_task1586_scifact_title_generation": 22.699, + "eval_rougeL_for_task1598_nyc_data_to_text": 29.287, + "eval_rougeL_for_task1612_sick_textual_entailment": 48.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 68.3431, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 65.0819, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 27.1304, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 64.5754, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 41.1968, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0299, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 16.4023, + "eval_rougeL_for_task219_rocstories_title_generation": 26.0884, + "eval_rougeL_for_task220_rocstories_title_generation": 45.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 42.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 28.0083, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 31.7179, + "eval_rougeL_for_task288_gigaword_title_generation": 21.2385, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 57.1947, + "eval_rougeL_for_task329_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 41.5333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 26.6935, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 11.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.2519, + "eval_rougeL_for_task418_persent_title_generation": 14.5083, + "eval_rougeL_for_task442_com_qa_question_rewriting": 59.8779, + "eval_rougeL_for_task500_scruples_title_generation": 9.8871, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.3079, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 14.4731, + "eval_rougeL_for_task602_wikitext_title_generation": 5.6212, + "eval_rougeL_for_task613_liar_keyword_tagging": 24.75, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 27.9778, + "eval_rougeL_for_task619_ohsumed_title_generation": 26.8649, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 34.2301, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 81.1589, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 12.9727, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 65.6786, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 53.8665, + "eval_rougeL_for_task677_ollie_data_to_text": 21.5503, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 51.0, + "eval_rougeL_for_task743_eurlex_title_generation": 18.239, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0038, + "eval_rougeL_for_task769_qed_title_generation": 63.6128, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 57.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 42.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.6857, + "eval_rougeL_for_task892_gap_coreference_resolution": 47.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 25.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.7907, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.893, + "eval_rougeL_for_title_generation": 22.5199, + "eval_rougeL_for_word_analogy": 19.8333, + "eval_runtime": 460.7253, + "eval_samples_per_second": 25.851, + "eval_steps_per_second": 0.81, + "step": 100 + }, + { + "epoch": 0.05, + "learning_rate": 5e-05, + "loss": 1.9583, + "step": 200 + }, + { + "epoch": 0.05, + "eval_exact_match": 25.6507, + "eval_exact_match_for_answerability_classification": 50.8462, + "eval_exact_match_for_cause_effect_classification": 36.5714, + "eval_exact_match_for_coreference_resolution": 31.2143, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 35.2857, + "eval_exact_match_for_grammar_error_correction": 4.0, + "eval_exact_match_for_keyword_tagging": 34.4, + "eval_exact_match_for_overlap_extraction": 9.0, + "eval_exact_match_for_question_rewriting": 1.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 31.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 19.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 9.0, + "eval_exact_match_for_task1153_bard_word_analogy": 7.0, + "eval_exact_match_for_task1154_bard_word_analogy": 19.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 27.0, + "eval_exact_match_for_task1157_bard_word_analogy": 29.0, + "eval_exact_match_for_task1158_bard_word_analogy": 15.0, + "eval_exact_match_for_task1159_bard_word_analogy": 11.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 6.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 46.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 64.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 8.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 41.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 2.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 17.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 9.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 27.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 15.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 52.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 10.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 73.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 59.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 38.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 23.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.4583, + "eval_exact_match_for_title_generation": 6.9507, + "eval_exact_match_for_word_analogy": 20.875, + "eval_f1": 40.8628, + "eval_f1_for_answerability_classification": 53.3621, + "eval_f1_for_cause_effect_classification": 53.5936, + "eval_f1_for_coreference_resolution": 39.7823, + "eval_f1_for_data_to_text": 39.4233, + "eval_f1_for_dialogue_act_recognition": 38.9286, + "eval_f1_for_grammar_error_correction": 48.7341, + "eval_f1_for_keyword_tagging": 47.5014, + "eval_f1_for_overlap_extraction": 28.3335, + "eval_f1_for_question_rewriting": 64.8857, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 31.0, + "eval_f1_for_task034_winogrande_question_rewriting": 77.4469, + "eval_f1_for_task035_winogrande_question_rewriting": 79.0045, + "eval_f1_for_task036_qasc_keyword_tagging": 57.2285, + "eval_f1_for_task039_qasc_overlap_extraction": 23.8333, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 39.8475, + "eval_f1_for_task1152_bard_word_analogy": 9.0, + "eval_f1_for_task1153_bard_word_analogy": 9.0, + "eval_f1_for_task1154_bard_word_analogy": 19.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 27.0, + "eval_f1_for_task1157_bard_word_analogy": 29.0, + "eval_f1_for_task1158_bard_word_analogy": 15.0, + "eval_f1_for_task1159_bard_word_analogy": 11.0, + "eval_f1_for_task1161_coda_19_title_generation": 19.6941, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 77.9725, + "eval_f1_for_task121_zest_question_rewriting": 47.6464, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 2.0158, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 37.6235, + "eval_f1_for_task1356_xlsum_title_generation": 7.6738, + "eval_f1_for_task1358_xlsum_title_generation": 26.3624, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 35.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 53.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_f1_for_task1407_dart_data_to_text": 25.6721, + "eval_f1_for_task1409_dart_data_to_text": 39.2989, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 22.7339, + "eval_f1_for_task1439_doqa_answerability_classification": 46.0, + "eval_f1_for_task1442_doqa_answerability_classification": 64.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 22.7298, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 74.7343, + "eval_f1_for_task1562_zest_question_rewriting": 48.2222, + "eval_f1_for_task1586_scifact_title_generation": 24.0668, + "eval_f1_for_task1598_nyc_data_to_text": 38.6834, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 41.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 72.911, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 68.968, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_f1_for_task1659_billsum_title_generation": 28.0846, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.0476, + "eval_f1_for_task1728_web_nlg_data_to_text": 37.4343, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 20.3259, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0408, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 36.6833, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.8336, + "eval_f1_for_task288_gigaword_title_generation": 23.8239, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 54.2776, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 43.6, + "eval_f1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 27.7068, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 13.2917, + "eval_f1_for_task402_grailqa_question_rewriting": 78.4931, + "eval_f1_for_task418_persent_title_generation": 13.4286, + "eval_f1_for_task442_com_qa_question_rewriting": 63.5042, + "eval_f1_for_task500_scruples_title_generation": 10.8322, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 30.4178, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 14.7055, + "eval_f1_for_task602_wikitext_title_generation": 5.0218, + "eval_f1_for_task613_liar_keyword_tagging": 16.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.7815, + "eval_f1_for_task619_ohsumed_title_generation": 28.6468, + "eval_f1_for_task620_ohsumed_keyword_tagging": 32.0381, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 83.2405, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 14.3667, + "eval_f1_for_task670_ambigqa_question_rewriting": 71.7263, + "eval_f1_for_task671_ambigqa_question_rewriting": 59.1926, + "eval_f1_for_task677_ollie_data_to_text": 23.3305, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 21.0861, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.2327, + "eval_f1_for_task769_qed_title_generation": 73.3129, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 53.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 23.0, + "eval_f1_for_task890_gwsd_textual_entailment": 38.0, + "eval_f1_for_task891_gap_coreference_resolution": 51.6857, + "eval_f1_for_task892_gap_coreference_resolution": 42.0, + "eval_f1_for_task893_gap_coreference_resolution": 23.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 51.5608, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.4583, + "eval_f1_for_title_generation": 23.6225, + "eval_f1_for_word_analogy": 21.125, + "eval_gen_len": 11.3839, + "eval_global_step": 200, + "eval_loss": 1.4671828746795654, + "eval_rouge1": 42.3354, + "eval_rouge1_for_answerability_classification": 53.3621, + "eval_rouge1_for_cause_effect_classification": 54.5534, + "eval_rouge1_for_coreference_resolution": 40.1752, + "eval_rouge1_for_data_to_text": 42.5909, + "eval_rouge1_for_dialogue_act_recognition": 40.7762, + "eval_rouge1_for_grammar_error_correction": 52.3639, + "eval_rouge1_for_keyword_tagging": 51.7284, + "eval_rouge1_for_overlap_extraction": 31.198, + "eval_rouge1_for_question_rewriting": 66.4937, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 32.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 77.6592, + "eval_rouge1_for_task035_winogrande_question_rewriting": 80.4777, + "eval_rouge1_for_task036_qasc_keyword_tagging": 61.5799, + "eval_rouge1_for_task039_qasc_overlap_extraction": 28.8333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 48.621, + "eval_rouge1_for_task1152_bard_word_analogy": 9.0, + "eval_rouge1_for_task1153_bard_word_analogy": 9.0, + "eval_rouge1_for_task1154_bard_word_analogy": 19.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 27.0, + "eval_rouge1_for_task1157_bard_word_analogy": 29.0, + "eval_rouge1_for_task1158_bard_word_analogy": 15.0, + "eval_rouge1_for_task1159_bard_word_analogy": 11.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 22.7816, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 78.268, + "eval_rouge1_for_task121_zest_question_rewriting": 49.9049, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 2.0551, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 41.4023, + "eval_rouge1_for_task1356_xlsum_title_generation": 8.9383, + "eval_rouge1_for_task1358_xlsum_title_generation": 31.4148, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 5.6, + "eval_rouge1_for_task1407_dart_data_to_text": 26.1875, + "eval_rouge1_for_task1409_dart_data_to_text": 39.5564, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 27.0131, + "eval_rouge1_for_task1439_doqa_answerability_classification": 46.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 64.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 24.559, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 77.7147, + "eval_rouge1_for_task1562_zest_question_rewriting": 50.3717, + "eval_rouge1_for_task1586_scifact_title_generation": 27.2881, + "eval_rouge1_for_task1598_nyc_data_to_text": 41.7427, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 80.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 73.3465, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 69.6706, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1659_billsum_title_generation": 28.8118, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.0476, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 48.4413, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 25.1822, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0408, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 37.6833, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 33.5627, + "eval_rouge1_for_task288_gigaword_title_generation": 26.5636, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 56.8919, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 43.6, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 28.6492, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 14.625, + "eval_rouge1_for_task402_grailqa_question_rewriting": 80.3532, + "eval_rouge1_for_task418_persent_title_generation": 16.4149, + "eval_rouge1_for_task442_com_qa_question_rewriting": 66.8999, + "eval_rouge1_for_task500_scruples_title_generation": 12.3964, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.9823, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 16.7521, + "eval_rouge1_for_task602_wikitext_title_generation": 5.7, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 30.5583, + "eval_rouge1_for_task619_ohsumed_title_generation": 31.0831, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 34.9881, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 84.7405, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 13.9667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 72.7913, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 59.9566, + "eval_rouge1_for_task677_ollie_data_to_text": 25.0169, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 21.3354, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.4319, + "eval_rouge1_for_task769_qed_title_generation": 72.9956, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 38.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 51.6381, + "eval_rouge1_for_task892_gap_coreference_resolution": 42.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 23.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.6718, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.0972, + "eval_rouge1_for_title_generation": 25.4676, + "eval_rouge1_for_word_analogy": 21.125, + "eval_rougeL": 41.2193, + "eval_rougeL_for_answerability_classification": 53.3621, + "eval_rougeL_for_cause_effect_classification": 54.0929, + "eval_rougeL_for_coreference_resolution": 40.1752, + "eval_rougeL_for_data_to_text": 36.356, + "eval_rougeL_for_dialogue_act_recognition": 40.7762, + "eval_rougeL_for_grammar_error_correction": 51.5382, + "eval_rougeL_for_keyword_tagging": 51.322, + "eval_rougeL_for_overlap_extraction": 30.7588, + "eval_rougeL_for_question_rewriting": 62.6032, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 32.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 76.9354, + "eval_rougeL_for_task035_winogrande_question_rewriting": 77.1706, + "eval_rougeL_for_task036_qasc_keyword_tagging": 59.8814, + "eval_rougeL_for_task039_qasc_overlap_extraction": 28.8333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 41.2387, + "eval_rougeL_for_task1152_bard_word_analogy": 9.0, + "eval_rougeL_for_task1153_bard_word_analogy": 9.0, + "eval_rougeL_for_task1154_bard_word_analogy": 19.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 27.0, + "eval_rougeL_for_task1157_bard_word_analogy": 29.0, + "eval_rougeL_for_task1158_bard_word_analogy": 15.0, + "eval_rougeL_for_task1159_bard_word_analogy": 11.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 19.7237, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 77.1113, + "eval_rougeL_for_task121_zest_question_rewriting": 45.2387, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 1.9837, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 37.9876, + "eval_rougeL_for_task1356_xlsum_title_generation": 7.8124, + "eval_rougeL_for_task1358_xlsum_title_generation": 27.5868, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 49.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 5.6, + "eval_rougeL_for_task1407_dart_data_to_text": 22.8983, + "eval_rougeL_for_task1409_dart_data_to_text": 33.8776, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 26.3247, + "eval_rougeL_for_task1439_doqa_answerability_classification": 46.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 64.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 23.4388, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 76.7518, + "eval_rougeL_for_task1562_zest_question_rewriting": 43.7427, + "eval_rougeL_for_task1586_scifact_title_generation": 23.2789, + "eval_rougeL_for_task1598_nyc_data_to_text": 30.4091, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 80.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 71.462, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 66.0652, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1659_billsum_title_generation": 24.6038, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 62.0476, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 42.1301, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 24.96, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0408, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 37.6833, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.6844, + "eval_rougeL_for_task288_gigaword_title_generation": 22.5127, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 56.8919, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 43.6, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 52.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 27.2734, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 14.625, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.5752, + "eval_rougeL_for_task418_persent_title_generation": 14.5101, + "eval_rougeL_for_task442_com_qa_question_rewriting": 62.8731, + "eval_rougeL_for_task500_scruples_title_generation": 11.7774, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 30.5003, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 16.7521, + "eval_rougeL_for_task602_wikitext_title_generation": 5.6749, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 28.7099, + "eval_rougeL_for_task619_ohsumed_title_generation": 27.8279, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 34.6548, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 84.7405, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 13.9667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 70.9256, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 58.6127, + "eval_rougeL_for_task677_ollie_data_to_text": 21.2987, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 18.4755, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.4319, + "eval_rougeL_for_task769_qed_title_generation": 72.9956, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 34.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 38.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 51.6381, + "eval_rougeL_for_task892_gap_coreference_resolution": 42.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 23.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.4902, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.0972, + "eval_rougeL_for_title_generation": 23.7392, + "eval_rougeL_for_word_analogy": 21.125, + "eval_runtime": 461.4432, + "eval_samples_per_second": 25.81, + "eval_steps_per_second": 0.808, + "step": 200 + }, + { + "epoch": 0.11, + "learning_rate": 5e-05, + "loss": 1.863, + "step": 500 + }, + { + "epoch": 0.11, + "eval_exact_match": 25.1973, + "eval_exact_match_for_answerability_classification": 50.5385, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 31.4286, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 36.0, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 36.6, + "eval_exact_match_for_overlap_extraction": 7.0, + "eval_exact_match_for_question_rewriting": 1.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 39.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 21.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 14.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 10.0, + "eval_exact_match_for_task1153_bard_word_analogy": 10.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 52.0, + "eval_exact_match_for_task1156_bard_word_analogy": 25.0, + "eval_exact_match_for_task1157_bard_word_analogy": 19.0, + "eval_exact_match_for_task1158_bard_word_analogy": 10.0, + "eval_exact_match_for_task1159_bard_word_analogy": 8.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 55.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 43.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 1.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 34.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 1.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 16.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 13.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 45.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 46.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 47.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 18.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 55.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 14.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 9.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 58.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 1.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 41.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 6.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 63.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 26.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 37.625, + "eval_exact_match_for_title_generation": 6.7825, + "eval_exact_match_for_word_analogy": 18.75, + "eval_f1": 40.771, + "eval_f1_for_answerability_classification": 53.1044, + "eval_f1_for_cause_effect_classification": 54.1011, + "eval_f1_for_coreference_resolution": 39.817, + "eval_f1_for_data_to_text": 39.7348, + "eval_f1_for_dialogue_act_recognition": 39.5, + "eval_f1_for_grammar_error_correction": 51.4858, + "eval_f1_for_keyword_tagging": 50.0189, + "eval_f1_for_overlap_extraction": 25.4882, + "eval_f1_for_question_rewriting": 67.2704, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 84.1315, + "eval_f1_for_task035_winogrande_question_rewriting": 82.9807, + "eval_f1_for_task036_qasc_keyword_tagging": 60.0501, + "eval_f1_for_task039_qasc_overlap_extraction": 19.0833, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 43.7126, + "eval_f1_for_task1152_bard_word_analogy": 10.0, + "eval_f1_for_task1153_bard_word_analogy": 10.6667, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 52.0, + "eval_f1_for_task1156_bard_word_analogy": 25.0, + "eval_f1_for_task1157_bard_word_analogy": 19.0, + "eval_f1_for_task1158_bard_word_analogy": 10.0, + "eval_f1_for_task1159_bard_word_analogy": 8.0, + "eval_f1_for_task1161_coda_19_title_generation": 21.9848, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 77.2353, + "eval_f1_for_task121_zest_question_rewriting": 49.5806, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 5.8391, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 36.4421, + "eval_f1_for_task1356_xlsum_title_generation": 8.1415, + "eval_f1_for_task1358_xlsum_title_generation": 29.1242, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 55.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 43.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_f1_for_task1407_dart_data_to_text": 26.9605, + "eval_f1_for_task1409_dart_data_to_text": 43.2484, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 21.5061, + "eval_f1_for_task1439_doqa_answerability_classification": 55.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 25.3346, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 81.4655, + "eval_f1_for_task1562_zest_question_rewriting": 56.9801, + "eval_f1_for_task1586_scifact_title_generation": 24.3956, + "eval_f1_for_task1598_nyc_data_to_text": 38.2578, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 34.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 72.3027, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 63.7663, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_f1_for_task1659_billsum_title_generation": 26.4583, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.8167, + "eval_f1_for_task1728_web_nlg_data_to_text": 40.371, + "eval_f1_for_task190_snli_textual_entailment": 13.0, + "eval_f1_for_task199_multinli_textual_entailment": 45.0, + "eval_f1_for_task200_multinli_textual_entailment": 33.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 13.972, + "eval_f1_for_task220_rocstories_title_generation": 46.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0235, + "eval_f1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 25.8238, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 31.8931, + "eval_f1_for_task288_gigaword_title_generation": 24.4431, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 61.7, + "eval_f1_for_task329_gap_coreference_resolution": 32.0, + "eval_f1_for_task330_gap_coreference_resolution": 43.7857, + "eval_f1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.2475, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 18.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 77.296, + "eval_f1_for_task418_persent_title_generation": 15.6602, + "eval_f1_for_task442_com_qa_question_rewriting": 64.076, + "eval_f1_for_task500_scruples_title_generation": 9.9238, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 30.8658, + "eval_f1_for_task520_aquamuse_answerability_classification": 58.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 13.8312, + "eval_f1_for_task602_wikitext_title_generation": 5.5751, + "eval_f1_for_task613_liar_keyword_tagging": 15.5, + "eval_f1_for_task614_glucose_cause_effect_classification": 30.7939, + "eval_f1_for_task619_ohsumed_title_generation": 30.2951, + "eval_f1_for_task620_ohsumed_keyword_tagging": 32.2381, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 41.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.3065, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 8.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 75.3617, + "eval_f1_for_task671_ambigqa_question_rewriting": 63.5879, + "eval_f1_for_task677_ollie_data_to_text": 18.7307, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 22.1491, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.0225, + "eval_f1_for_task769_qed_title_generation": 73.8284, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 48.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task890_gwsd_textual_entailment": 35.0, + "eval_f1_for_task891_gap_coreference_resolution": 54.1444, + "eval_f1_for_task892_gap_coreference_resolution": 36.0, + "eval_f1_for_task893_gap_coreference_resolution": 26.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 52.1162, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 37.625, + "eval_f1_for_title_generation": 23.931, + "eval_f1_for_word_analogy": 18.8333, + "eval_gen_len": 10.7761, + "eval_global_step": 500, + "eval_loss": 1.4241459369659424, + "eval_rouge1": 42.2968, + "eval_rouge1_for_answerability_classification": 53.1044, + "eval_rouge1_for_cause_effect_classification": 54.948, + "eval_rouge1_for_coreference_resolution": 40.1852, + "eval_rouge1_for_data_to_text": 43.1293, + "eval_rouge1_for_dialogue_act_recognition": 41.7857, + "eval_rouge1_for_grammar_error_correction": 56.2537, + "eval_rouge1_for_keyword_tagging": 53.6589, + "eval_rouge1_for_overlap_extraction": 27.3813, + "eval_rouge1_for_question_rewriting": 68.9778, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 84.4273, + "eval_rouge1_for_task035_winogrande_question_rewriting": 83.897, + "eval_rouge1_for_task036_qasc_keyword_tagging": 62.2478, + "eval_rouge1_for_task039_qasc_overlap_extraction": 22.0833, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 54.5382, + "eval_rouge1_for_task1152_bard_word_analogy": 10.0, + "eval_rouge1_for_task1153_bard_word_analogy": 10.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 52.0, + "eval_rouge1_for_task1156_bard_word_analogy": 25.0, + "eval_rouge1_for_task1157_bard_word_analogy": 19.0, + "eval_rouge1_for_task1158_bard_word_analogy": 10.0, + "eval_rouge1_for_task1159_bard_word_analogy": 8.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 25.3462, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 77.5884, + "eval_rouge1_for_task121_zest_question_rewriting": 51.5406, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 5.8246, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 39.3883, + "eval_rouge1_for_task1356_xlsum_title_generation": 9.5537, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.1287, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 55.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 43.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_rouge1_for_task1407_dart_data_to_text": 27.4095, + "eval_rouge1_for_task1409_dart_data_to_text": 43.2722, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 27.7873, + "eval_rouge1_for_task1439_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 27.1152, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 84.7202, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.5865, + "eval_rouge1_for_task1586_scifact_title_generation": 27.2757, + "eval_rouge1_for_task1598_nyc_data_to_text": 40.9906, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 73.3167, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 64.1623, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_rouge1_for_task1659_billsum_title_generation": 28.0377, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.8167, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 50.8611, + "eval_rouge1_for_task190_snli_textual_entailment": 13.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 45.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 17.706, + "eval_rouge1_for_task220_rocstories_title_generation": 46.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0235, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 25.9905, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 32.6792, + "eval_rouge1_for_task288_gigaword_title_generation": 27.2505, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 61.9, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 44.119, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.8419, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 20.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 79.4971, + "eval_rouge1_for_task418_persent_title_generation": 18.0744, + "eval_rouge1_for_task442_com_qa_question_rewriting": 68.2758, + "eval_rouge1_for_task500_scruples_title_generation": 11.7533, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.8308, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 58.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 16.2591, + "eval_rouge1_for_task602_wikitext_title_generation": 5.883, + "eval_rouge1_for_task613_liar_keyword_tagging": 27.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 36.1275, + "eval_rouge1_for_task619_ohsumed_title_generation": 33.3349, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 36.1214, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 41.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 92.5922, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 8.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 76.6789, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 64.5595, + "eval_rouge1_for_task677_ollie_data_to_text": 21.3245, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 23.3617, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.1814, + "eval_rouge1_for_task769_qed_title_generation": 73.4365, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 54.2667, + "eval_rouge1_for_task892_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 26.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 52.6023, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 39.4583, + "eval_rouge1_for_title_generation": 25.7416, + "eval_rouge1_for_word_analogy": 18.8333, + "eval_rougeL": 41.1522, + "eval_rougeL_for_answerability_classification": 53.1044, + "eval_rougeL_for_cause_effect_classification": 54.3493, + "eval_rougeL_for_coreference_resolution": 40.1852, + "eval_rougeL_for_data_to_text": 36.7055, + "eval_rougeL_for_dialogue_act_recognition": 41.7857, + "eval_rougeL_for_grammar_error_correction": 55.336, + "eval_rougeL_for_keyword_tagging": 53.0806, + "eval_rougeL_for_overlap_extraction": 27.0488, + "eval_rougeL_for_question_rewriting": 65.3047, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 84.3129, + "eval_rougeL_for_task035_winogrande_question_rewriting": 82.7877, + "eval_rougeL_for_task036_qasc_keyword_tagging": 60.0896, + "eval_rougeL_for_task039_qasc_overlap_extraction": 22.0833, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 46.3874, + "eval_rougeL_for_task1152_bard_word_analogy": 10.0, + "eval_rougeL_for_task1153_bard_word_analogy": 10.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 52.0, + "eval_rougeL_for_task1156_bard_word_analogy": 25.0, + "eval_rougeL_for_task1157_bard_word_analogy": 19.0, + "eval_rougeL_for_task1158_bard_word_analogy": 10.0, + "eval_rougeL_for_task1159_bard_word_analogy": 8.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 21.3851, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 76.4072, + "eval_rougeL_for_task121_zest_question_rewriting": 46.6896, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 5.7055, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 36.1454, + "eval_rougeL_for_task1356_xlsum_title_generation": 7.857, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.6606, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 55.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 43.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_rougeL_for_task1407_dart_data_to_text": 23.0959, + "eval_rougeL_for_task1409_dart_data_to_text": 37.1966, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 26.6889, + "eval_rougeL_for_task1439_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 26.0216, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 83.9831, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.6921, + "eval_rougeL_for_task1586_scifact_title_generation": 23.021, + "eval_rougeL_for_task1598_nyc_data_to_text": 30.1573, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 71.8401, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 60.2923, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_rougeL_for_task1659_billsum_title_generation": 23.8642, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 62.8167, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 44.3546, + "eval_rougeL_for_task190_snli_textual_entailment": 13.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 45.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 17.506, + "eval_rougeL_for_task220_rocstories_title_generation": 46.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 42.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0235, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 25.9905, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.0142, + "eval_rougeL_for_task288_gigaword_title_generation": 23.0595, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 61.9, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 44.119, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 55.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.1973, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 20.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.1326, + "eval_rougeL_for_task418_persent_title_generation": 16.1468, + "eval_rougeL_for_task442_com_qa_question_rewriting": 63.5898, + "eval_rougeL_for_task500_scruples_title_generation": 11.1337, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 30.5002, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 58.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 16.097, + "eval_rougeL_for_task602_wikitext_title_generation": 5.8576, + "eval_rougeL_for_task613_liar_keyword_tagging": 27.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 33.5813, + "eval_rougeL_for_task619_ohsumed_title_generation": 30.3708, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 35.3881, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 41.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 92.5922, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 8.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 75.1979, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 63.5561, + "eval_rougeL_for_task677_ollie_data_to_text": 18.0288, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 20.2639, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.0093, + "eval_rougeL_for_task769_qed_title_generation": 73.4365, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 54.2667, + "eval_rougeL_for_task892_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 26.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.6321, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 39.4583, + "eval_rougeL_for_title_generation": 23.8761, + "eval_rougeL_for_word_analogy": 18.8333, + "eval_runtime": 455.0667, + "eval_samples_per_second": 26.172, + "eval_steps_per_second": 0.82, + "step": 500 + }, + { + "epoch": 0.23, + "learning_rate": 5e-05, + "loss": 1.7111, + "step": 1000 + }, + { + "epoch": 0.23, + "eval_exact_match": 24.8615, + "eval_exact_match_for_answerability_classification": 48.7692, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 31.4286, + "eval_exact_match_for_data_to_text": 0.3632, + "eval_exact_match_for_dialogue_act_recognition": 31.0, + "eval_exact_match_for_grammar_error_correction": 8.0, + "eval_exact_match_for_keyword_tagging": 40.8, + "eval_exact_match_for_overlap_extraction": 9.0, + "eval_exact_match_for_question_rewriting": 1.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 39.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 32.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 48.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 10.0, + "eval_exact_match_for_task1153_bard_word_analogy": 9.0, + "eval_exact_match_for_task1154_bard_word_analogy": 18.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 22.0, + "eval_exact_match_for_task1158_bard_word_analogy": 8.0, + "eval_exact_match_for_task1159_bard_word_analogy": 9.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 46.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 35.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 16.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 1.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 15.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 0.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 49.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 28.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 37.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 49.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 23.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 51.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 21.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 51.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 20.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 49.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 18.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 3.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 17.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 21.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 49.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 4.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 61.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 54.0, + "eval_exact_match_for_textual_entailment": 37.125, + "eval_exact_match_for_title_generation": 7.1749, + "eval_exact_match_for_word_analogy": 18.625, + "eval_f1": 40.872, + "eval_f1_for_answerability_classification": 51.2949, + "eval_f1_for_cause_effect_classification": 53.6995, + "eval_f1_for_coreference_resolution": 40.6384, + "eval_f1_for_data_to_text": 40.5127, + "eval_f1_for_dialogue_act_recognition": 34.7143, + "eval_f1_for_grammar_error_correction": 58.0435, + "eval_f1_for_keyword_tagging": 52.2065, + "eval_f1_for_overlap_extraction": 26.8089, + "eval_f1_for_question_rewriting": 68.3382, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 39.0, + "eval_f1_for_task034_winogrande_question_rewriting": 88.1103, + "eval_f1_for_task035_winogrande_question_rewriting": 85.0083, + "eval_f1_for_task036_qasc_keyword_tagging": 66.8518, + "eval_f1_for_task039_qasc_overlap_extraction": 23.1667, + "eval_f1_for_task050_multirc_answerability_classification": 48.0, + "eval_f1_for_task102_commongen_data_to_text": 45.5506, + "eval_f1_for_task1152_bard_word_analogy": 10.0, + "eval_f1_for_task1153_bard_word_analogy": 9.0, + "eval_f1_for_task1154_bard_word_analogy": 18.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 22.0, + "eval_f1_for_task1158_bard_word_analogy": 8.0, + "eval_f1_for_task1159_bard_word_analogy": 9.0, + "eval_f1_for_task1161_coda_19_title_generation": 25.4847, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.9616, + "eval_f1_for_task121_zest_question_rewriting": 49.4289, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.8551, + "eval_f1_for_task1344_rte_textual_entailment": 46.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.7501, + "eval_f1_for_task1356_xlsum_title_generation": 8.6644, + "eval_f1_for_task1358_xlsum_title_generation": 28.9576, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 35.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 28.613, + "eval_f1_for_task1409_dart_data_to_text": 41.6464, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 32.6695, + "eval_f1_for_task1439_doqa_answerability_classification": 54.0, + "eval_f1_for_task1442_doqa_answerability_classification": 55.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 45.0, + "eval_f1_for_task1540_peer_read_title_generation": 24.6597, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4176, + "eval_f1_for_task1562_zest_question_rewriting": 56.2358, + "eval_f1_for_task1586_scifact_title_generation": 24.3027, + "eval_f1_for_task1598_nyc_data_to_text": 35.0253, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 76.999, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 66.7403, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 28.7621, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 59.5262, + "eval_f1_for_task1728_web_nlg_data_to_text": 43.0942, + "eval_f1_for_task190_snli_textual_entailment": 0.0, + "eval_f1_for_task199_multinli_textual_entailment": 49.0, + "eval_f1_for_task200_multinli_textual_entailment": 28.0, + "eval_f1_for_task201_multinli_textual_entailment": 37.0, + "eval_f1_for_task202_multinli_textual_entailment": 34.0, + "eval_f1_for_task219_rocstories_title_generation": 15.6675, + "eval_f1_for_task220_rocstories_title_generation": 49.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 23.1667, + "eval_f1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 33.8476, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 30.4512, + "eval_f1_for_task288_gigaword_title_generation": 25.0843, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 61.4667, + "eval_f1_for_task329_gap_coreference_resolution": 27.0, + "eval_f1_for_task330_gap_coreference_resolution": 45.819, + "eval_f1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.5052, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 23.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 69.0671, + "eval_f1_for_task418_persent_title_generation": 17.5234, + "eval_f1_for_task442_com_qa_question_rewriting": 65.4507, + "eval_f1_for_task500_scruples_title_generation": 13.8254, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 29.7995, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 20.9161, + "eval_f1_for_task602_wikitext_title_generation": 6.4132, + "eval_f1_for_task613_liar_keyword_tagging": 17.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.7246, + "eval_f1_for_task619_ohsumed_title_generation": 31.2141, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.7333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 38.0, + "eval_f1_for_task642_e_snli_textual_entailment": 49.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 92.781, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 13.6, + "eval_f1_for_task670_ambigqa_question_rewriting": 77.6425, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.0664, + "eval_f1_for_task677_ollie_data_to_text": 20.0675, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 23.8529, + "eval_f1_for_task760_msr_sqa_data_to_text": 5.3233, + "eval_f1_for_task769_qed_title_generation": 74.1405, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 48.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 52.0111, + "eval_f1_for_task892_gap_coreference_resolution": 30.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 52.5139, + "eval_f1_for_task970_sherliic_textual_entailment": 54.0, + "eval_f1_for_textual_entailment": 37.125, + "eval_f1_for_title_generation": 25.566, + "eval_f1_for_word_analogy": 18.625, + "eval_gen_len": 10.9901, + "eval_global_step": 1000, + "eval_loss": 1.4187277555465698, + "eval_rouge1": 42.2853, + "eval_rouge1_for_answerability_classification": 51.2939, + "eval_rouge1_for_cause_effect_classification": 54.2775, + "eval_rouge1_for_coreference_resolution": 41.2995, + "eval_rouge1_for_data_to_text": 43.968, + "eval_rouge1_for_dialogue_act_recognition": 34.8857, + "eval_rouge1_for_grammar_error_correction": 62.6261, + "eval_rouge1_for_keyword_tagging": 56.0153, + "eval_rouge1_for_overlap_extraction": 27.5784, + "eval_rouge1_for_question_rewriting": 69.9926, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 88.3877, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.8266, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.3288, + "eval_rouge1_for_task039_qasc_overlap_extraction": 24.1667, + "eval_rouge1_for_task050_multirc_answerability_classification": 48.0, + "eval_rouge1_for_task102_commongen_data_to_text": 57.4787, + "eval_rouge1_for_task1152_bard_word_analogy": 10.0, + "eval_rouge1_for_task1153_bard_word_analogy": 9.0, + "eval_rouge1_for_task1154_bard_word_analogy": 18.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 22.0, + "eval_rouge1_for_task1158_bard_word_analogy": 8.0, + "eval_rouge1_for_task1159_bard_word_analogy": 9.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 28.6429, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2958, + "eval_rouge1_for_task121_zest_question_rewriting": 51.8183, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.9931, + "eval_rouge1_for_task1344_rte_textual_entailment": 46.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.2332, + "eval_rouge1_for_task1356_xlsum_title_generation": 10.8845, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.7321, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 35.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 4.2, + "eval_rouge1_for_task1407_dart_data_to_text": 29.3078, + "eval_rouge1_for_task1409_dart_data_to_text": 41.7418, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.6405, + "eval_rouge1_for_task1439_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 45.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 26.6432, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6116, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.931, + "eval_rouge1_for_task1586_scifact_title_generation": 27.3778, + "eval_rouge1_for_task1598_nyc_data_to_text": 37.2792, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.8114, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 67.2205, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 30.8386, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 59.5262, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 54.0669, + "eval_rouge1_for_task190_snli_textual_entailment": 0.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 49.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 28.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 37.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.3531, + "eval_rouge1_for_task220_rocstories_title_generation": 49.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 23.1538, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 34.5143, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 30.9902, + "eval_rouge1_for_task288_gigaword_title_generation": 27.7772, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 62.8667, + "eval_rouge1_for_task329_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 45.819, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.0095, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 28.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 71.0542, + "eval_rouge1_for_task418_persent_title_generation": 19.7633, + "eval_rouge1_for_task442_com_qa_question_rewriting": 69.4166, + "eval_rouge1_for_task500_scruples_title_generation": 15.2243, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 29.9201, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 23.3688, + "eval_rouge1_for_task602_wikitext_title_generation": 6.7386, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 31.2664, + "eval_rouge1_for_task619_ohsumed_title_generation": 33.5968, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 37.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 49.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.281, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 13.2667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 78.3635, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.7802, + "eval_rouge1_for_task677_ollie_data_to_text": 21.7527, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 25.106, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.5114, + "eval_rouge1_for_task769_qed_title_generation": 73.8952, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 52.2, + "eval_rouge1_for_task892_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 52.8948, + "eval_rouge1_for_task970_sherliic_textual_entailment": 54.0, + "eval_rouge1_for_textual_entailment": 38.9861, + "eval_rouge1_for_title_generation": 27.3978, + "eval_rouge1_for_word_analogy": 18.625, + "eval_rougeL": 41.1167, + "eval_rougeL_for_answerability_classification": 51.2939, + "eval_rougeL_for_cause_effect_classification": 53.6878, + "eval_rougeL_for_coreference_resolution": 41.2995, + "eval_rougeL_for_data_to_text": 37.454, + "eval_rougeL_for_dialogue_act_recognition": 34.8857, + "eval_rougeL_for_grammar_error_correction": 61.5913, + "eval_rougeL_for_keyword_tagging": 55.5545, + "eval_rougeL_for_overlap_extraction": 26.9665, + "eval_rougeL_for_question_rewriting": 66.5824, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 88.335, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.1663, + "eval_rougeL_for_task036_qasc_keyword_tagging": 67.3583, + "eval_rougeL_for_task039_qasc_overlap_extraction": 24.1667, + "eval_rougeL_for_task050_multirc_answerability_classification": 48.0, + "eval_rougeL_for_task102_commongen_data_to_text": 49.5853, + "eval_rougeL_for_task1152_bard_word_analogy": 10.0, + "eval_rougeL_for_task1153_bard_word_analogy": 9.0, + "eval_rougeL_for_task1154_bard_word_analogy": 18.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 22.0, + "eval_rougeL_for_task1158_bard_word_analogy": 8.0, + "eval_rougeL_for_task1159_bard_word_analogy": 9.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 23.8088, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1385, + "eval_rougeL_for_task121_zest_question_rewriting": 47.3129, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.8341, + "eval_rougeL_for_task1344_rte_textual_entailment": 46.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.1393, + "eval_rougeL_for_task1356_xlsum_title_generation": 9.1541, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.7081, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 35.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 48.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 4.2, + "eval_rougeL_for_task1407_dart_data_to_text": 25.0293, + "eval_rougeL_for_task1409_dart_data_to_text": 35.8267, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.4502, + "eval_rougeL_for_task1439_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 45.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 24.4044, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7325, + "eval_rougeL_for_task1562_zest_question_rewriting": 50.9055, + "eval_rougeL_for_task1586_scifact_title_generation": 22.6493, + "eval_rougeL_for_task1598_nyc_data_to_text": 28.7738, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.2646, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 60.0564, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 25.9409, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 59.5262, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.2245, + "eval_rougeL_for_task190_snli_textual_entailment": 0.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 49.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 28.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 37.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.1309, + "eval_rougeL_for_task220_rocstories_title_generation": 49.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 23.1538, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 34.5143, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 29.7663, + "eval_rougeL_for_task288_gigaword_title_generation": 23.7962, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.6667, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 62.8667, + "eval_rougeL_for_task329_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 45.819, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 74.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.6485, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 28.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 59.4032, + "eval_rougeL_for_task418_persent_title_generation": 17.1425, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.0571, + "eval_rougeL_for_task500_scruples_title_generation": 13.8534, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 29.5729, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 22.9688, + "eval_rougeL_for_task602_wikitext_title_generation": 6.6885, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 28.4992, + "eval_rougeL_for_task619_ohsumed_title_generation": 30.2478, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 36.8, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 49.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.281, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 13.2667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 77.1919, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.4926, + "eval_rougeL_for_task677_ollie_data_to_text": 18.391, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 21.9177, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 5.4299, + "eval_rougeL_for_task769_qed_title_generation": 73.8952, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 26.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 52.2, + "eval_rougeL_for_task892_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 43.0713, + "eval_rougeL_for_task970_sherliic_textual_entailment": 54.0, + "eval_rougeL_for_textual_entailment": 38.9861, + "eval_rougeL_for_title_generation": 25.2603, + "eval_rougeL_for_word_analogy": 18.625, + "eval_runtime": 492.2867, + "eval_samples_per_second": 24.193, + "eval_steps_per_second": 0.758, + "step": 1000 + }, + { + "epoch": 0.34, + "learning_rate": 5e-05, + "loss": 1.6196, + "step": 1500 + }, + { + "epoch": 0.34, + "eval_exact_match": 25.6087, + "eval_exact_match_for_answerability_classification": 48.6923, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 33.7857, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 36.1429, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 34.0, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 1.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 29.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 10.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 10.0, + "eval_exact_match_for_task1153_bard_word_analogy": 6.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 25.0, + "eval_exact_match_for_task1157_bard_word_analogy": 25.0, + "eval_exact_match_for_task1158_bard_word_analogy": 13.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 61.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 17.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 44.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 28.0, + "eval_exact_match_for_task219_rocstories_title_generation": 5.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 12.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 34.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 20.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 36.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 19.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 40.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 7.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 57.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 38.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.1667, + "eval_exact_match_for_title_generation": 6.7825, + "eval_exact_match_for_word_analogy": 19.75, + "eval_f1": 42.2127, + "eval_f1_for_answerability_classification": 51.2692, + "eval_f1_for_cause_effect_classification": 54.3911, + "eval_f1_for_coreference_resolution": 42.5748, + "eval_f1_for_data_to_text": 40.7598, + "eval_f1_for_dialogue_act_recognition": 39.7143, + "eval_f1_for_grammar_error_correction": 56.4765, + "eval_f1_for_keyword_tagging": 48.4226, + "eval_f1_for_overlap_extraction": 32.3769, + "eval_f1_for_question_rewriting": 70.6226, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 30.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.3866, + "eval_f1_for_task035_winogrande_question_rewriting": 86.585, + "eval_f1_for_task036_qasc_keyword_tagging": 53.0131, + "eval_f1_for_task039_qasc_overlap_extraction": 28.3205, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 50.4383, + "eval_f1_for_task1152_bard_word_analogy": 10.0, + "eval_f1_for_task1153_bard_word_analogy": 8.0, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 25.0, + "eval_f1_for_task1157_bard_word_analogy": 25.0, + "eval_f1_for_task1158_bard_word_analogy": 13.0, + "eval_f1_for_task1159_bard_word_analogy": 13.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.3968, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.5166, + "eval_f1_for_task121_zest_question_rewriting": 50.4138, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 6.6891, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.4081, + "eval_f1_for_task1356_xlsum_title_generation": 11.0268, + "eval_f1_for_task1358_xlsum_title_generation": 29.5539, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_f1_for_task1407_dart_data_to_text": 27.3214, + "eval_f1_for_task1409_dart_data_to_text": 41.5696, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.5071, + "eval_f1_for_task1439_doqa_answerability_classification": 61.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1540_peer_read_title_generation": 26.8911, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4458, + "eval_f1_for_task1562_zest_question_rewriting": 57.0947, + "eval_f1_for_task1586_scifact_title_generation": 27.2051, + "eval_f1_for_task1598_nyc_data_to_text": 38.5404, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.1703, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_f1_for_task1631_open_pi_data_to_text": 64.663, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_f1_for_task1659_billsum_title_generation": 27.7095, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.4762, + "eval_f1_for_task1728_web_nlg_data_to_text": 41.917, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 44.0, + "eval_f1_for_task202_multinli_textual_entailment": 28.0, + "eval_f1_for_task219_rocstories_title_generation": 17.6415, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 12.1667, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 45.7405, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 36.4333, + "eval_f1_for_task288_gigaword_title_generation": 24.0074, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 61.7333, + "eval_f1_for_task329_gap_coreference_resolution": 36.0, + "eval_f1_for_task330_gap_coreference_resolution": 48.0524, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 35.4768, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 25.5, + "eval_f1_for_task402_grailqa_question_rewriting": 77.2959, + "eval_f1_for_task418_persent_title_generation": 15.0927, + "eval_f1_for_task442_com_qa_question_rewriting": 69.1449, + "eval_f1_for_task500_scruples_title_generation": 13.1064, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.8802, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 20.5811, + "eval_f1_for_task602_wikitext_title_generation": 7.9039, + "eval_f1_for_task613_liar_keyword_tagging": 19.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 28.5941, + "eval_f1_for_task619_ohsumed_title_generation": 34.3663, + "eval_f1_for_task620_ohsumed_keyword_tagging": 35.5333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 40.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.9, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 10.6592, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.5314, + "eval_f1_for_task671_ambigqa_question_rewriting": 67.3015, + "eval_f1_for_task677_ollie_data_to_text": 20.2507, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 25.0602, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.3376, + "eval_f1_for_task769_qed_title_generation": 77.06, + "eval_f1_for_task827_copa_cause_effect_classification": 51.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 38.0, + "eval_f1_for_task891_gap_coreference_resolution": 49.719, + "eval_f1_for_task892_gap_coreference_resolution": 41.0, + "eval_f1_for_task893_gap_coreference_resolution": 37.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 50.8479, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.1667, + "eval_f1_for_title_generation": 26.5083, + "eval_f1_for_word_analogy": 20.0, + "eval_gen_len": 10.2558, + "eval_global_step": 1500, + "eval_loss": 1.4218910932540894, + "eval_rouge1": 43.8028, + "eval_rouge1_for_answerability_classification": 51.2682, + "eval_rouge1_for_cause_effect_classification": 54.996, + "eval_rouge1_for_coreference_resolution": 43.3754, + "eval_rouge1_for_data_to_text": 44.3267, + "eval_rouge1_for_dialogue_act_recognition": 41.8905, + "eval_rouge1_for_grammar_error_correction": 60.6307, + "eval_rouge1_for_keyword_tagging": 52.3249, + "eval_rouge1_for_overlap_extraction": 33.8135, + "eval_rouge1_for_question_rewriting": 72.1866, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 33.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.466, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.2271, + "eval_rouge1_for_task036_qasc_keyword_tagging": 54.9388, + "eval_rouge1_for_task039_qasc_overlap_extraction": 30.3205, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 62.1271, + "eval_rouge1_for_task1152_bard_word_analogy": 10.0, + "eval_rouge1_for_task1153_bard_word_analogy": 8.0, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 25.0, + "eval_rouge1_for_task1157_bard_word_analogy": 25.0, + "eval_rouge1_for_task1158_bard_word_analogy": 13.0, + "eval_rouge1_for_task1159_bard_word_analogy": 13.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.2787, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.7743, + "eval_rouge1_for_task121_zest_question_rewriting": 52.2451, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 6.6737, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.0805, + "eval_rouge1_for_task1356_xlsum_title_generation": 13.2341, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.3163, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 5.9, + "eval_rouge1_for_task1407_dart_data_to_text": 28.011, + "eval_rouge1_for_task1409_dart_data_to_text": 41.8937, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.4697, + "eval_rouge1_for_task1439_doqa_answerability_classification": 61.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 29.8595, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.7918, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.9765, + "eval_rouge1_for_task1586_scifact_title_generation": 30.4609, + "eval_rouge1_for_task1598_nyc_data_to_text": 41.2535, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.4483, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 65.3052, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1659_billsum_title_generation": 29.5188, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.4762, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 52.4866, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 44.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 28.0, + "eval_rouge1_for_task219_rocstories_title_generation": 22.7415, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 12.1538, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 46.4071, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 37.3065, + "eval_rouge1_for_task288_gigaword_title_generation": 26.7907, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 63.1333, + "eval_rouge1_for_task329_gap_coreference_resolution": 36.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 48.1524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 35.8817, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 30.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 79.5992, + "eval_rouge1_for_task418_persent_title_generation": 17.4937, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.8378, + "eval_rouge1_for_task500_scruples_title_generation": 14.8868, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.2508, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 23.498, + "eval_rouge1_for_task602_wikitext_title_generation": 8.2204, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 32.4235, + "eval_rouge1_for_task619_ohsumed_title_generation": 37.1371, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 38.1667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 40.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.1857, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 10.6344, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.4703, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 67.928, + "eval_rouge1_for_task677_ollie_data_to_text": 22.3823, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 26.4528, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.7089, + "eval_rouge1_for_task769_qed_title_generation": 77.0941, + "eval_rouge1_for_task827_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 38.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 49.9524, + "eval_rouge1_for_task892_gap_coreference_resolution": 41.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 37.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.4545, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.0278, + "eval_rouge1_for_title_generation": 28.5646, + "eval_rouge1_for_word_analogy": 20.0, + "eval_rougeL": 42.577, + "eval_rougeL_for_answerability_classification": 51.2682, + "eval_rougeL_for_cause_effect_classification": 54.2808, + "eval_rougeL_for_coreference_resolution": 43.3679, + "eval_rougeL_for_data_to_text": 37.5104, + "eval_rougeL_for_dialogue_act_recognition": 41.8905, + "eval_rougeL_for_grammar_error_correction": 59.5547, + "eval_rougeL_for_keyword_tagging": 51.751, + "eval_rougeL_for_overlap_extraction": 33.097, + "eval_rougeL_for_question_rewriting": 68.8215, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 33.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.466, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.4762, + "eval_rougeL_for_task036_qasc_keyword_tagging": 52.6528, + "eval_rougeL_for_task039_qasc_overlap_extraction": 30.3205, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.6482, + "eval_rougeL_for_task1152_bard_word_analogy": 10.0, + "eval_rougeL_for_task1153_bard_word_analogy": 8.0, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 25.0, + "eval_rougeL_for_task1157_bard_word_analogy": 25.0, + "eval_rougeL_for_task1158_bard_word_analogy": 13.0, + "eval_rougeL_for_task1159_bard_word_analogy": 13.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.4699, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.617, + "eval_rougeL_for_task121_zest_question_rewriting": 48.0074, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 6.3547, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.9835, + "eval_rougeL_for_task1356_xlsum_title_generation": 10.8112, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.3454, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 5.9, + "eval_rougeL_for_task1407_dart_data_to_text": 23.9643, + "eval_rougeL_for_task1409_dart_data_to_text": 36.225, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.2351, + "eval_rougeL_for_task1439_doqa_answerability_classification": 61.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 28.0475, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.8743, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.7459, + "eval_rougeL_for_task1586_scifact_title_generation": 25.4821, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.973, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.1515, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 54.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 56.1901, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1659_billsum_title_generation": 24.8817, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.4762, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 45.727, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 44.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 28.0, + "eval_rougeL_for_task219_rocstories_title_generation": 22.5193, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 12.1538, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 46.4071, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 35.8736, + "eval_rougeL_for_task288_gigaword_title_generation": 22.723, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 63.1333, + "eval_rougeL_for_task329_gap_coreference_resolution": 36.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 48.1524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 34.1452, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 30.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 66.6286, + "eval_rougeL_for_task418_persent_title_generation": 15.1946, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.0479, + "eval_rougeL_for_task500_scruples_title_generation": 13.3233, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 33.0064, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 22.4829, + "eval_rougeL_for_task602_wikitext_title_generation": 8.171, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 29.1534, + "eval_rougeL_for_task619_ohsumed_title_generation": 32.7195, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 37.5833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 40.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.1857, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 10.5292, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.7851, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.1273, + "eval_rougeL_for_task677_ollie_data_to_text": 18.437, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 23.1661, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.5334, + "eval_rougeL_for_task769_qed_title_generation": 77.0941, + "eval_rougeL_for_task827_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 56.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 38.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 49.9524, + "eval_rougeL_for_task892_gap_coreference_resolution": 41.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 37.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.4926, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.0278, + "eval_rougeL_for_title_generation": 26.2604, + "eval_rougeL_for_word_analogy": 20.0, + "eval_runtime": 434.913, + "eval_samples_per_second": 27.385, + "eval_steps_per_second": 0.858, + "step": 1500 + }, + { + "epoch": 0.46, + "learning_rate": 5e-05, + "loss": 1.6129, + "step": 2000 + }, + { + "epoch": 0.46, + "eval_exact_match": 25.9698, + "eval_exact_match_for_answerability_classification": 51.4615, + "eval_exact_match_for_cause_effect_classification": 35.4286, + "eval_exact_match_for_coreference_resolution": 35.0, + "eval_exact_match_for_data_to_text": 0.1211, + "eval_exact_match_for_dialogue_act_recognition": 35.4286, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 35.6, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 1.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 36.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 27.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 48.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 11.0, + "eval_exact_match_for_task1153_bard_word_analogy": 8.0, + "eval_exact_match_for_task1154_bard_word_analogy": 14.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 25.0, + "eval_exact_match_for_task1157_bard_word_analogy": 23.0, + "eval_exact_match_for_task1158_bard_word_analogy": 11.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 60.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 2.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 9.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 37.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 26.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 29.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 45.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 24.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 57.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 0.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 36.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 45.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 83.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 5.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 65.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 33.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 48.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.4583, + "eval_exact_match_for_title_generation": 7.5112, + "eval_exact_match_for_word_analogy": 19.375, + "eval_f1": 42.4307, + "eval_f1_for_answerability_classification": 54.0256, + "eval_f1_for_cause_effect_classification": 53.587, + "eval_f1_for_coreference_resolution": 43.5701, + "eval_f1_for_data_to_text": 40.7549, + "eval_f1_for_dialogue_act_recognition": 39.2143, + "eval_f1_for_grammar_error_correction": 57.3016, + "eval_f1_for_keyword_tagging": 49.2526, + "eval_f1_for_overlap_extraction": 32.5794, + "eval_f1_for_question_rewriting": 69.1834, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 36.5, + "eval_f1_for_task034_winogrande_question_rewriting": 91.6868, + "eval_f1_for_task035_winogrande_question_rewriting": 85.1365, + "eval_f1_for_task036_qasc_keyword_tagging": 63.9151, + "eval_f1_for_task039_qasc_overlap_extraction": 27.1667, + "eval_f1_for_task050_multirc_answerability_classification": 48.0, + "eval_f1_for_task102_commongen_data_to_text": 48.1858, + "eval_f1_for_task1152_bard_word_analogy": 11.0, + "eval_f1_for_task1153_bard_word_analogy": 9.3333, + "eval_f1_for_task1154_bard_word_analogy": 14.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 25.0, + "eval_f1_for_task1157_bard_word_analogy": 23.0, + "eval_f1_for_task1158_bard_word_analogy": 11.0, + "eval_f1_for_task1159_bard_word_analogy": 13.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.0088, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 78.9018, + "eval_f1_for_task121_zest_question_rewriting": 50.5929, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.208, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.3155, + "eval_f1_for_task1356_xlsum_title_generation": 10.8025, + "eval_f1_for_task1358_xlsum_title_generation": 29.8777, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_f1_for_task1407_dart_data_to_text": 25.8701, + "eval_f1_for_task1409_dart_data_to_text": 42.1878, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 31.1665, + "eval_f1_for_task1439_doqa_answerability_classification": 60.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 27.4472, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4368, + "eval_f1_for_task1562_zest_question_rewriting": 55.3364, + "eval_f1_for_task1586_scifact_title_generation": 28.9203, + "eval_f1_for_task1598_nyc_data_to_text": 37.2687, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 76.775, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_f1_for_task1631_open_pi_data_to_text": 64.283, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_f1_for_task1659_billsum_title_generation": 31.5626, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.8571, + "eval_f1_for_task1728_web_nlg_data_to_text": 44.966, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 30.0, + "eval_f1_for_task201_multinli_textual_entailment": 32.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 21.5842, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 53.081, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 37.9921, + "eval_f1_for_task288_gigaword_title_generation": 25.2857, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 60.0048, + "eval_f1_for_task329_gap_coreference_resolution": 29.0, + "eval_f1_for_task330_gap_coreference_resolution": 52.4857, + "eval_f1_for_task349_squad2.0_answerability_classification": 45.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.5076, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 30.5, + "eval_f1_for_task402_grailqa_question_rewriting": 75.3951, + "eval_f1_for_task418_persent_title_generation": 16.7934, + "eval_f1_for_task442_com_qa_question_rewriting": 65.4268, + "eval_f1_for_task500_scruples_title_generation": 13.0599, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.8133, + "eval_f1_for_task520_aquamuse_answerability_classification": 57.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 23.1275, + "eval_f1_for_task602_wikitext_title_generation": 11.4683, + "eval_f1_for_task613_liar_keyword_tagging": 18.5, + "eval_f1_for_task614_glucose_cause_effect_classification": 26.9346, + "eval_f1_for_task619_ohsumed_title_generation": 32.5135, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.4, + "eval_f1_for_task623_ohsumed_keyword_tagging": 36.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 45.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 90.4476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 6.3333, + "eval_f1_for_task670_ambigqa_question_rewriting": 76.9572, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.4939, + "eval_f1_for_task677_ollie_data_to_text": 20.9509, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 23.7623, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.8898, + "eval_f1_for_task769_qed_title_generation": 75.1595, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 33.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.219, + "eval_f1_for_task892_gap_coreference_resolution": 48.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 51.6516, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.4583, + "eval_f1_for_title_generation": 27.3856, + "eval_f1_for_word_analogy": 19.5417, + "eval_gen_len": 9.8153, + "eval_global_step": 2000, + "eval_loss": 1.4055153131484985, + "eval_rouge1": 44.0454, + "eval_rouge1_for_answerability_classification": 54.0256, + "eval_rouge1_for_cause_effect_classification": 54.0177, + "eval_rouge1_for_coreference_resolution": 44.5548, + "eval_rouge1_for_data_to_text": 43.5428, + "eval_rouge1_for_dialogue_act_recognition": 41.5619, + "eval_rouge1_for_grammar_error_correction": 60.6232, + "eval_rouge1_for_keyword_tagging": 54.71, + "eval_rouge1_for_overlap_extraction": 34.7248, + "eval_rouge1_for_question_rewriting": 70.6769, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 40.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 91.6595, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.7782, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.0689, + "eval_rouge1_for_task039_qasc_overlap_extraction": 30.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 48.0, + "eval_rouge1_for_task102_commongen_data_to_text": 58.6199, + "eval_rouge1_for_task1152_bard_word_analogy": 11.0, + "eval_rouge1_for_task1153_bard_word_analogy": 9.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 14.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 25.0, + "eval_rouge1_for_task1157_bard_word_analogy": 23.0, + "eval_rouge1_for_task1158_bard_word_analogy": 11.0, + "eval_rouge1_for_task1159_bard_word_analogy": 13.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.5811, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.2325, + "eval_rouge1_for_task121_zest_question_rewriting": 52.686, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.1999, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.912, + "eval_rouge1_for_task1356_xlsum_title_generation": 13.6353, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.3317, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 8.1, + "eval_rouge1_for_task1407_dart_data_to_text": 26.1869, + "eval_rouge1_for_task1409_dart_data_to_text": 42.3894, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.6463, + "eval_rouge1_for_task1439_doqa_answerability_classification": 60.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 29.3798, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.0535, + "eval_rouge1_for_task1586_scifact_title_generation": 32.6196, + "eval_rouge1_for_task1598_nyc_data_to_text": 39.671, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.215, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 64.8999, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1659_billsum_title_generation": 33.4664, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.8571, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 52.1259, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 26.1763, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 53.2476, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 38.7829, + "eval_rouge1_for_task288_gigaword_title_generation": 27.901, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 62.2905, + "eval_rouge1_for_task329_gap_coreference_resolution": 29.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 52.7524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 45.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.2122, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 37.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 77.3244, + "eval_rouge1_for_task418_persent_title_generation": 19.207, + "eval_rouge1_for_task442_com_qa_question_rewriting": 68.8425, + "eval_rouge1_for_task500_scruples_title_generation": 14.542, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.1509, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 57.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 25.6843, + "eval_rouge1_for_task602_wikitext_title_generation": 12.3326, + "eval_rouge1_for_task613_liar_keyword_tagging": 34.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 29.2453, + "eval_rouge1_for_task619_ohsumed_title_generation": 35.4813, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.5333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 36.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 45.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 91.9476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 6.3333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 77.6179, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 66.1246, + "eval_rouge1_for_task677_ollie_data_to_text": 22.6376, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 25.3354, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 5.4013, + "eval_rouge1_for_task769_qed_title_generation": 75.2413, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 33.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.4524, + "eval_rouge1_for_task892_gap_coreference_resolution": 48.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.7285, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 40.3194, + "eval_rouge1_for_title_generation": 29.4447, + "eval_rouge1_for_word_analogy": 19.5417, + "eval_rougeL": 42.8432, + "eval_rougeL_for_answerability_classification": 54.0256, + "eval_rougeL_for_cause_effect_classification": 53.2923, + "eval_rougeL_for_coreference_resolution": 44.5548, + "eval_rougeL_for_data_to_text": 37.0384, + "eval_rougeL_for_dialogue_act_recognition": 41.5619, + "eval_rougeL_for_grammar_error_correction": 59.2963, + "eval_rougeL_for_keyword_tagging": 54.2091, + "eval_rougeL_for_overlap_extraction": 34.0993, + "eval_rougeL_for_question_rewriting": 67.1993, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 40.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 91.6595, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.0764, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.5478, + "eval_rougeL_for_task039_qasc_overlap_extraction": 30.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 48.0, + "eval_rougeL_for_task102_commongen_data_to_text": 50.6992, + "eval_rougeL_for_task1152_bard_word_analogy": 11.0, + "eval_rougeL_for_task1153_bard_word_analogy": 9.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 14.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 25.0, + "eval_rougeL_for_task1157_bard_word_analogy": 23.0, + "eval_rougeL_for_task1158_bard_word_analogy": 11.0, + "eval_rougeL_for_task1159_bard_word_analogy": 13.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.9958, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.4212, + "eval_rougeL_for_task121_zest_question_rewriting": 46.6747, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 7.8809, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.0393, + "eval_rougeL_for_task1356_xlsum_title_generation": 11.431, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.7835, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 45.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 8.1, + "eval_rougeL_for_task1407_dart_data_to_text": 22.5153, + "eval_rougeL_for_task1409_dart_data_to_text": 37.1238, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 32.9101, + "eval_rougeL_for_task1439_doqa_answerability_classification": 60.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 28.0912, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6825, + "eval_rougeL_for_task1562_zest_question_rewriting": 49.8016, + "eval_rougeL_for_task1586_scifact_title_generation": 27.2271, + "eval_rougeL_for_task1598_nyc_data_to_text": 30.6672, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 75.8749, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 56.7591, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.5767, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.8571, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 45.9429, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 25.9541, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 53.2476, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 37.5319, + "eval_rougeL_for_task288_gigaword_title_generation": 23.6349, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 62.2905, + "eval_rougeL_for_task329_gap_coreference_resolution": 29.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 52.7524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 45.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 73.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.7495, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 37.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 65.3286, + "eval_rougeL_for_task418_persent_title_generation": 16.6057, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.2438, + "eval_rougeL_for_task500_scruples_title_generation": 13.3003, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.7554, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 57.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 25.189, + "eval_rougeL_for_task602_wikitext_title_generation": 12.1326, + "eval_rougeL_for_task613_liar_keyword_tagging": 34.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 25.63, + "eval_rougeL_for_task619_ohsumed_title_generation": 31.2406, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.55, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 36.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 45.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 91.9476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 6.3333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 76.1622, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.9104, + "eval_rougeL_for_task677_ollie_data_to_text": 18.6096, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 22.4864, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.833, + "eval_rougeL_for_task769_qed_title_generation": 75.2413, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 33.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.4524, + "eval_rougeL_for_task892_gap_coreference_resolution": 48.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.3632, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 40.3194, + "eval_rougeL_for_title_generation": 27.2189, + "eval_rougeL_for_word_analogy": 19.5417, + "eval_runtime": 390.4436, + "eval_samples_per_second": 30.504, + "eval_steps_per_second": 0.955, + "step": 2000 + }, + { + "epoch": 0.57, + "learning_rate": 5e-05, + "loss": 1.5916, + "step": 2500 + }, + { + "epoch": 0.57, + "eval_exact_match": 25.3904, + "eval_exact_match_for_answerability_classification": 50.2308, + "eval_exact_match_for_cause_effect_classification": 36.7143, + "eval_exact_match_for_coreference_resolution": 31.9286, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 35.2857, + "eval_exact_match_for_grammar_error_correction": 7.5, + "eval_exact_match_for_keyword_tagging": 38.2, + "eval_exact_match_for_overlap_extraction": 12.5, + "eval_exact_match_for_question_rewriting": 1.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 33.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 25.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 25.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 11.0, + "eval_exact_match_for_task1153_bard_word_analogy": 9.0, + "eval_exact_match_for_task1154_bard_word_analogy": 16.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 22.0, + "eval_exact_match_for_task1157_bard_word_analogy": 19.0, + "eval_exact_match_for_task1158_bard_word_analogy": 9.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 55.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 48.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 23.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 15.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 30.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 3.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 20.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 45.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 40.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 17.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 25.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 46.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 20.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 59.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 1.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 14.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 47.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 11.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 0.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 63.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 37.2917, + "eval_exact_match_for_title_generation": 7.1749, + "eval_exact_match_for_word_analogy": 18.75, + "eval_f1": 42.0282, + "eval_f1_for_answerability_classification": 52.7949, + "eval_f1_for_cause_effect_classification": 55.3444, + "eval_f1_for_coreference_resolution": 40.5299, + "eval_f1_for_data_to_text": 42.3767, + "eval_f1_for_dialogue_act_recognition": 38.8571, + "eval_f1_for_grammar_error_correction": 61.4953, + "eval_f1_for_keyword_tagging": 52.3439, + "eval_f1_for_overlap_extraction": 29.9544, + "eval_f1_for_question_rewriting": 69.6615, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 34.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.8374, + "eval_f1_for_task035_winogrande_question_rewriting": 86.5062, + "eval_f1_for_task036_qasc_keyword_tagging": 63.7386, + "eval_f1_for_task039_qasc_overlap_extraction": 30.0667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 50.5279, + "eval_f1_for_task1152_bard_word_analogy": 11.0, + "eval_f1_for_task1153_bard_word_analogy": 9.6667, + "eval_f1_for_task1154_bard_word_analogy": 16.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 22.0, + "eval_f1_for_task1157_bard_word_analogy": 19.0, + "eval_f1_for_task1158_bard_word_analogy": 9.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.2658, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.7655, + "eval_f1_for_task121_zest_question_rewriting": 51.1128, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 11.1047, + "eval_f1_for_task1344_rte_textual_entailment": 55.0, + "eval_f1_for_task1345_qqp_question_rewriting": 38.9313, + "eval_f1_for_task1356_xlsum_title_generation": 12.1622, + "eval_f1_for_task1358_xlsum_title_generation": 30.2085, + "eval_f1_for_task1385_anli_textual_entailment": 36.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 53.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 1.0, + "eval_f1_for_task1407_dart_data_to_text": 26.8364, + "eval_f1_for_task1409_dart_data_to_text": 43.0672, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 39.507, + "eval_f1_for_task1439_doqa_answerability_classification": 48.0, + "eval_f1_for_task1442_doqa_answerability_classification": 51.0, + "eval_f1_for_task1516_imppres_textual_entailment": 23.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 26.6653, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4836, + "eval_f1_for_task1562_zest_question_rewriting": 53.5979, + "eval_f1_for_task1586_scifact_title_generation": 25.5082, + "eval_f1_for_task1598_nyc_data_to_text": 37.1206, + "eval_f1_for_task1612_sick_textual_entailment": 30.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.2398, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 70.1679, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_f1_for_task1659_billsum_title_generation": 30.9788, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.4762, + "eval_f1_for_task1728_web_nlg_data_to_text": 47.9925, + "eval_f1_for_task190_snli_textual_entailment": 5.5, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 20.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 15.9087, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 45.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.4476, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 29.8422, + "eval_f1_for_task288_gigaword_title_generation": 24.317, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 52.2571, + "eval_f1_for_task329_gap_coreference_resolution": 25.0, + "eval_f1_for_task330_gap_coreference_resolution": 50.3524, + "eval_f1_for_task349_squad2.0_answerability_classification": 46.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.7615, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 25.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 74.6416, + "eval_f1_for_task418_persent_title_generation": 18.8692, + "eval_f1_for_task442_com_qa_question_rewriting": 66.1843, + "eval_f1_for_task500_scruples_title_generation": 10.8433, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.9194, + "eval_f1_for_task520_aquamuse_answerability_classification": 59.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 21.4649, + "eval_f1_for_task602_wikitext_title_generation": 10.8359, + "eval_f1_for_task613_liar_keyword_tagging": 15.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 28.9828, + "eval_f1_for_task619_ohsumed_title_generation": 33.6294, + "eval_f1_for_task620_ohsumed_keyword_tagging": 38.0333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 47.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.6143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 12.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.168, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.2917, + "eval_f1_for_task677_ollie_data_to_text": 21.1963, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 24.3636, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.7618, + "eval_f1_for_task769_qed_title_generation": 77.9855, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 35.0, + "eval_f1_for_task891_gap_coreference_resolution": 49.8857, + "eval_f1_for_task892_gap_coreference_resolution": 32.0, + "eval_f1_for_task893_gap_coreference_resolution": 27.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_f1_for_task957_e2e_data_to_text": 51.885, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 37.3958, + "eval_f1_for_title_generation": 27.0906, + "eval_f1_for_word_analogy": 18.8333, + "eval_gen_len": 10.1305, + "eval_global_step": 2500, + "eval_loss": 1.4324835538864136, + "eval_rouge1": 43.5742, + "eval_rouge1_for_answerability_classification": 52.7949, + "eval_rouge1_for_cause_effect_classification": 55.8125, + "eval_rouge1_for_coreference_resolution": 41.1716, + "eval_rouge1_for_data_to_text": 45.0315, + "eval_rouge1_for_dialogue_act_recognition": 40.9762, + "eval_rouge1_for_grammar_error_correction": 65.9784, + "eval_rouge1_for_keyword_tagging": 57.0719, + "eval_rouge1_for_overlap_extraction": 32.2555, + "eval_rouge1_for_question_rewriting": 71.1316, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 35.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.8102, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.0896, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.3783, + "eval_rouge1_for_task039_qasc_overlap_extraction": 33.7333, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 61.6103, + "eval_rouge1_for_task1152_bard_word_analogy": 11.0, + "eval_rouge1_for_task1153_bard_word_analogy": 9.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 16.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 22.0, + "eval_rouge1_for_task1157_bard_word_analogy": 19.0, + "eval_rouge1_for_task1158_bard_word_analogy": 9.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.2292, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.0997, + "eval_rouge1_for_task121_zest_question_rewriting": 52.6719, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.3559, + "eval_rouge1_for_task1344_rte_textual_entailment": 55.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 42.3221, + "eval_rouge1_for_task1356_xlsum_title_generation": 14.8534, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.5191, + "eval_rouge1_for_task1385_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.5, + "eval_rouge1_for_task1407_dart_data_to_text": 26.7248, + "eval_rouge1_for_task1409_dart_data_to_text": 43.2968, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 45.128, + "eval_rouge1_for_task1439_doqa_answerability_classification": 48.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 23.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 28.8837, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.8288, + "eval_rouge1_for_task1562_zest_question_rewriting": 56.3755, + "eval_rouge1_for_task1586_scifact_title_generation": 28.8174, + "eval_rouge1_for_task1598_nyc_data_to_text": 39.5288, + "eval_rouge1_for_task1612_sick_textual_entailment": 30.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 77.6719, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 71.0068, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rouge1_for_task1659_billsum_title_generation": 32.9475, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.4762, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 53.761, + "eval_rouge1_for_task190_snli_textual_entailment": 5.5, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 20.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 20.3698, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 45.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 55.1143, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 30.7777, + "eval_rouge1_for_task288_gigaword_title_generation": 27.2706, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 53.9071, + "eval_rouge1_for_task329_gap_coreference_resolution": 25.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 50.9524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 46.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 35.6024, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 29.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 76.3345, + "eval_rouge1_for_task418_persent_title_generation": 21.3602, + "eval_rouge1_for_task442_com_qa_question_rewriting": 69.9614, + "eval_rouge1_for_task500_scruples_title_generation": 12.385, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.0736, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 59.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 24.8141, + "eval_rouge1_for_task602_wikitext_title_generation": 11.8603, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 31.4185, + "eval_rouge1_for_task619_ohsumed_title_generation": 36.2846, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 44.5333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 47.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.1143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 12.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.1009, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 66.0102, + "eval_rouge1_for_task677_ollie_data_to_text": 22.9935, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 26.1711, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.9292, + "eval_rouge1_for_task769_qed_title_generation": 78.0672, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 50.119, + "eval_rouge1_for_task892_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.757, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 39.2569, + "eval_rouge1_for_title_generation": 29.1685, + "eval_rouge1_for_word_analogy": 18.8333, + "eval_rougeL": 42.345, + "eval_rougeL_for_answerability_classification": 52.7949, + "eval_rougeL_for_cause_effect_classification": 55.0702, + "eval_rougeL_for_coreference_resolution": 41.1716, + "eval_rougeL_for_data_to_text": 38.5831, + "eval_rougeL_for_dialogue_act_recognition": 40.9762, + "eval_rougeL_for_grammar_error_correction": 64.6916, + "eval_rougeL_for_keyword_tagging": 56.4363, + "eval_rougeL_for_overlap_extraction": 31.5331, + "eval_rougeL_for_question_rewriting": 67.4931, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 35.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.8102, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.684, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.5172, + "eval_rougeL_for_task039_qasc_overlap_extraction": 33.7333, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.231, + "eval_rougeL_for_task1152_bard_word_analogy": 11.0, + "eval_rougeL_for_task1153_bard_word_analogy": 9.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 16.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 22.0, + "eval_rougeL_for_task1157_bard_word_analogy": 19.0, + "eval_rougeL_for_task1158_bard_word_analogy": 9.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.1585, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.9979, + "eval_rougeL_for_task121_zest_question_rewriting": 46.6326, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.6704, + "eval_rougeL_for_task1344_rte_textual_entailment": 55.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 39.5147, + "eval_rougeL_for_task1356_xlsum_title_generation": 12.5821, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.6304, + "eval_rougeL_for_task1385_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 41.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.5, + "eval_rougeL_for_task1407_dart_data_to_text": 23.5671, + "eval_rougeL_for_task1409_dart_data_to_text": 37.4603, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 43.4719, + "eval_rougeL_for_task1439_doqa_answerability_classification": 48.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 23.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 27.2139, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.9113, + "eval_rougeL_for_task1562_zest_question_rewriting": 47.4142, + "eval_rougeL_for_task1586_scifact_title_generation": 23.6386, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.0414, + "eval_rougeL_for_task1612_sick_textual_entailment": 30.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.014, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 61.9755, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rougeL_for_task1659_billsum_title_generation": 27.8617, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 62.4762, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.8914, + "eval_rougeL_for_task190_snli_textual_entailment": 5.5, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 20.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 20.0142, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 45.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 55.1143, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 29.3328, + "eval_rougeL_for_task288_gigaword_title_generation": 23.3928, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 53.9071, + "eval_rougeL_for_task329_gap_coreference_resolution": 25.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 50.9524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 46.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.9928, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 29.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.5544, + "eval_rougeL_for_task418_persent_title_generation": 18.6908, + "eval_rougeL_for_task442_com_qa_question_rewriting": 65.978, + "eval_rougeL_for_task500_scruples_title_generation": 11.3696, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.6537, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 59.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 24.703, + "eval_rougeL_for_task602_wikitext_title_generation": 11.6874, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 27.8319, + "eval_rougeL_for_task619_ohsumed_title_generation": 32.3848, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 43.2167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 47.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.1143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 12.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.0387, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.7854, + "eval_rougeL_for_task677_ollie_data_to_text": 19.1526, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 22.7936, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.4574, + "eval_rougeL_for_task769_qed_title_generation": 78.0672, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 51.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 50.119, + "eval_rougeL_for_task892_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.2181, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 39.2569, + "eval_rougeL_for_title_generation": 26.8858, + "eval_rougeL_for_word_analogy": 18.8333, + "eval_runtime": 450.3934, + "eval_samples_per_second": 26.444, + "eval_steps_per_second": 0.828, + "step": 2500 + }, + { + "epoch": 0.69, + "learning_rate": 5e-05, + "loss": 1.5458, + "step": 3000 + }, + { + "epoch": 0.69, + "eval_exact_match": 25.8774, + "eval_exact_match_for_answerability_classification": 51.3077, + "eval_exact_match_for_cause_effect_classification": 35.7143, + "eval_exact_match_for_coreference_resolution": 33.9286, + "eval_exact_match_for_data_to_text": 0.0, + "eval_exact_match_for_dialogue_act_recognition": 32.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 40.0, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 1.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 36.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 32.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 44.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 11.0, + "eval_exact_match_for_task1153_bard_word_analogy": 5.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 52.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 19.0, + "eval_exact_match_for_task1158_bard_word_analogy": 10.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 51.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 25.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 23.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 58.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 1.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 31.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 12.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 0.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 28.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 51.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 45.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 17.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 42.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 55.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 17.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 38.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 21.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 69.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 40.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 13.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.5, + "eval_exact_match_for_title_generation": 7.3991, + "eval_exact_match_for_word_analogy": 18.625, + "eval_f1": 42.2174, + "eval_f1_for_answerability_classification": 53.8718, + "eval_f1_for_cause_effect_classification": 53.5714, + "eval_f1_for_coreference_resolution": 42.3257, + "eval_f1_for_data_to_text": 41.4995, + "eval_f1_for_dialogue_act_recognition": 36.2143, + "eval_f1_for_grammar_error_correction": 56.4668, + "eval_f1_for_keyword_tagging": 52.4977, + "eval_f1_for_overlap_extraction": 35.2569, + "eval_f1_for_question_rewriting": 69.0166, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 37.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.542, + "eval_f1_for_task035_winogrande_question_rewriting": 85.0587, + "eval_f1_for_task036_qasc_keyword_tagging": 67.4598, + "eval_f1_for_task039_qasc_overlap_extraction": 33.5667, + "eval_f1_for_task050_multirc_answerability_classification": 44.0, + "eval_f1_for_task102_commongen_data_to_text": 52.5291, + "eval_f1_for_task1152_bard_word_analogy": 11.0, + "eval_f1_for_task1153_bard_word_analogy": 5.6667, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 52.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 19.0, + "eval_f1_for_task1158_bard_word_analogy": 10.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.4783, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.6, + "eval_f1_for_task121_zest_question_rewriting": 49.3279, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 7.9441, + "eval_f1_for_task1344_rte_textual_entailment": 51.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.5731, + "eval_f1_for_task1356_xlsum_title_generation": 12.4848, + "eval_f1_for_task1358_xlsum_title_generation": 30.1553, + "eval_f1_for_task1385_anli_textual_entailment": 25.0, + "eval_f1_for_task1386_anli_textual_entailment": 36.0, + "eval_f1_for_task1387_anli_textual_entailment": 32.0, + "eval_f1_for_task1388_cb_textual_entailment": 23.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 24.3764, + "eval_f1_for_task1409_dart_data_to_text": 42.0102, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 29.6976, + "eval_f1_for_task1439_doqa_answerability_classification": 58.0, + "eval_f1_for_task1442_doqa_answerability_classification": 50.0, + "eval_f1_for_task1516_imppres_textual_entailment": 1.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 27.9108, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.236, + "eval_f1_for_task1562_zest_question_rewriting": 53.0852, + "eval_f1_for_task1586_scifact_title_generation": 28.2229, + "eval_f1_for_task1598_nyc_data_to_text": 38.5271, + "eval_f1_for_task1612_sick_textual_entailment": 31.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.9426, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_f1_for_task1631_open_pi_data_to_text": 68.1897, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_f1_for_task1659_billsum_title_generation": 28.6882, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 59.9333, + "eval_f1_for_task1728_web_nlg_data_to_text": 41.656, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 28.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 13.5594, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 57.131, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 36.9472, + "eval_f1_for_task288_gigaword_title_generation": 24.4277, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 50.3571, + "eval_f1_for_task329_gap_coreference_resolution": 30.0, + "eval_f1_for_task330_gap_coreference_resolution": 50.819, + "eval_f1_for_task349_squad2.0_answerability_classification": 42.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.1523, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 26.5, + "eval_f1_for_task402_grailqa_question_rewriting": 67.094, + "eval_f1_for_task418_persent_title_generation": 17.1119, + "eval_f1_for_task442_com_qa_question_rewriting": 66.7316, + "eval_f1_for_task500_scruples_title_generation": 13.4955, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 33.0378, + "eval_f1_for_task520_aquamuse_answerability_classification": 55.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 20.5021, + "eval_f1_for_task602_wikitext_title_generation": 8.1549, + "eval_f1_for_task613_liar_keyword_tagging": 15.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.1805, + "eval_f1_for_task619_ohsumed_title_generation": 35.2985, + "eval_f1_for_task620_ohsumed_keyword_tagging": 36.2238, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 38.0, + "eval_f1_for_task642_e_snli_textual_entailment": 37.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.8048, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 24.6667, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.0081, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.2196, + "eval_f1_for_task677_ollie_data_to_text": 24.1795, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 25.5816, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.8595, + "eval_f1_for_task769_qed_title_generation": 82.3807, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 50.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 40.0, + "eval_f1_for_task891_gap_coreference_resolution": 52.9857, + "eval_f1_for_task892_gap_coreference_resolution": 31.0, + "eval_f1_for_task893_gap_coreference_resolution": 13.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 50.3145, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.5, + "eval_f1_for_title_generation": 27.1934, + "eval_f1_for_word_analogy": 18.7083, + "eval_gen_len": 9.9123, + "eval_global_step": 3000, + "eval_loss": 1.425909161567688, + "eval_rouge1": 43.7938, + "eval_rouge1_for_answerability_classification": 53.8718, + "eval_rouge1_for_cause_effect_classification": 53.8803, + "eval_rouge1_for_coreference_resolution": 43.0364, + "eval_rouge1_for_data_to_text": 44.8319, + "eval_rouge1_for_dialogue_act_recognition": 38.2619, + "eval_rouge1_for_grammar_error_correction": 61.9493, + "eval_rouge1_for_keyword_tagging": 57.1101, + "eval_rouge1_for_overlap_extraction": 37.176, + "eval_rouge1_for_question_rewriting": 70.5232, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.5148, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.8639, + "eval_rouge1_for_task036_qasc_keyword_tagging": 69.955, + "eval_rouge1_for_task039_qasc_overlap_extraction": 36.5667, + "eval_rouge1_for_task050_multirc_answerability_classification": 44.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.4215, + "eval_rouge1_for_task1152_bard_word_analogy": 11.0, + "eval_rouge1_for_task1153_bard_word_analogy": 5.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 52.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 19.0, + "eval_rouge1_for_task1158_bard_word_analogy": 10.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.4802, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.8631, + "eval_rouge1_for_task121_zest_question_rewriting": 51.4096, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.0567, + "eval_rouge1_for_task1344_rte_textual_entailment": 51.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.1601, + "eval_rouge1_for_task1356_xlsum_title_generation": 14.8587, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.9372, + "eval_rouge1_for_task1385_anli_textual_entailment": 25.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 23.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_rouge1_for_task1407_dart_data_to_text": 24.5323, + "eval_rouge1_for_task1409_dart_data_to_text": 41.9367, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.3162, + "eval_rouge1_for_task1439_doqa_answerability_classification": 58.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 1.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 30.2306, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5824, + "eval_rouge1_for_task1562_zest_question_rewriting": 55.5605, + "eval_rouge1_for_task1586_scifact_title_generation": 31.2217, + "eval_rouge1_for_task1598_nyc_data_to_text": 40.5336, + "eval_rouge1_for_task1612_sick_textual_entailment": 31.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.2206, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 68.8828, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rouge1_for_task1659_billsum_title_generation": 30.6066, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 59.9333, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 53.3578, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 28.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 17.9475, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 57.2976, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 37.7853, + "eval_rouge1_for_task288_gigaword_title_generation": 27.2098, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 51.4738, + "eval_rouge1_for_task329_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 51.419, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 42.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.8293, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 31.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 69.2245, + "eval_rouge1_for_task418_persent_title_generation": 19.505, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.1902, + "eval_rouge1_for_task500_scruples_title_generation": 15.1791, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.3211, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 55.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 23.7845, + "eval_rouge1_for_task602_wikitext_title_generation": 8.7605, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 25.6663, + "eval_rouge1_for_task619_ohsumed_title_generation": 37.9052, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.2905, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 38.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.3048, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 24.6667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.877, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 68.8715, + "eval_rouge1_for_task677_ollie_data_to_text": 26.2956, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 27.0728, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.0229, + "eval_rouge1_for_task769_qed_title_generation": 82.4625, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 40.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.219, + "eval_rouge1_for_task892_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 13.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.3053, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 40.3056, + "eval_rouge1_for_title_generation": 29.2118, + "eval_rouge1_for_word_analogy": 18.7083, + "eval_rougeL": 42.6169, + "eval_rougeL_for_answerability_classification": 53.8718, + "eval_rougeL_for_cause_effect_classification": 53.1771, + "eval_rougeL_for_coreference_resolution": 43.0364, + "eval_rougeL_for_data_to_text": 38.4801, + "eval_rougeL_for_dialogue_act_recognition": 38.2619, + "eval_rougeL_for_grammar_error_correction": 60.6783, + "eval_rougeL_for_keyword_tagging": 56.6631, + "eval_rougeL_for_overlap_extraction": 36.5774, + "eval_rougeL_for_question_rewriting": 67.0961, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.5148, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.2453, + "eval_rougeL_for_task036_qasc_keyword_tagging": 68.6372, + "eval_rougeL_for_task039_qasc_overlap_extraction": 36.5667, + "eval_rougeL_for_task050_multirc_answerability_classification": 44.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.0787, + "eval_rougeL_for_task1152_bard_word_analogy": 11.0, + "eval_rougeL_for_task1153_bard_word_analogy": 5.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 52.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 19.0, + "eval_rougeL_for_task1158_bard_word_analogy": 10.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.6713, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.8939, + "eval_rougeL_for_task121_zest_question_rewriting": 44.9473, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 7.7785, + "eval_rougeL_for_task1344_rte_textual_entailment": 51.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.4031, + "eval_rougeL_for_task1356_xlsum_title_generation": 12.6211, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.6937, + "eval_rougeL_for_task1385_anli_textual_entailment": 25.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 23.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 53.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_rougeL_for_task1407_dart_data_to_text": 21.4197, + "eval_rougeL_for_task1409_dart_data_to_text": 35.6482, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 35.6904, + "eval_rougeL_for_task1439_doqa_answerability_classification": 58.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 1.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 28.5733, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6662, + "eval_rougeL_for_task1562_zest_question_rewriting": 46.8388, + "eval_rougeL_for_task1586_scifact_title_generation": 26.4879, + "eval_rougeL_for_task1598_nyc_data_to_text": 33.6763, + "eval_rougeL_for_task1612_sick_textual_entailment": 31.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 76.8043, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 59.7015, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 58.0, + "eval_rougeL_for_task1659_billsum_title_generation": 25.8087, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 59.9333, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.0201, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 28.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 17.9475, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 57.2976, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 36.5881, + "eval_rougeL_for_task288_gigaword_title_generation": 23.2884, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 51.4738, + "eval_rougeL_for_task329_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 51.419, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 42.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.75, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 31.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 58.0355, + "eval_rougeL_for_task418_persent_title_generation": 16.8424, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.1272, + "eval_rougeL_for_task500_scruples_title_generation": 14.2204, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.8715, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 55.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 23.186, + "eval_rougeL_for_task602_wikitext_title_generation": 8.7114, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.8229, + "eval_rougeL_for_task619_ohsumed_title_generation": 34.12, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 40.3738, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 38.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.3048, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 24.6667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.0375, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.2091, + "eval_rougeL_for_task677_ollie_data_to_text": 21.7271, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 23.1381, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.6926, + "eval_rougeL_for_task769_qed_title_generation": 82.4625, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 30.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 40.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.219, + "eval_rougeL_for_task892_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 13.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.6136, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 40.3056, + "eval_rougeL_for_title_generation": 27.0196, + "eval_rougeL_for_word_analogy": 18.7083, + "eval_runtime": 364.9586, + "eval_samples_per_second": 32.634, + "eval_steps_per_second": 1.022, + "step": 3000 + }, + { + "epoch": 0.8, + "learning_rate": 5e-05, + "loss": 1.4984, + "step": 3500 + }, + { + "epoch": 0.8, + "eval_exact_match": 26.1545, + "eval_exact_match_for_answerability_classification": 50.0, + "eval_exact_match_for_cause_effect_classification": 36.8571, + "eval_exact_match_for_coreference_resolution": 34.8571, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 34.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 37.0, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 49.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 41.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 20.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 44.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 13.0, + "eval_exact_match_for_task1153_bard_word_analogy": 9.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 50.0, + "eval_exact_match_for_task1156_bard_word_analogy": 24.0, + "eval_exact_match_for_task1157_bard_word_analogy": 25.0, + "eval_exact_match_for_task1158_bard_word_analogy": 12.0, + "eval_exact_match_for_task1159_bard_word_analogy": 16.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 56.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 22.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 16.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 35.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 29.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 44.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 16.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 43.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 8.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 53.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 1.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 16.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 39.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 85.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 20.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 49.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 63.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 55.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 35.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.4583, + "eval_exact_match_for_title_generation": 7.1188, + "eval_exact_match_for_word_analogy": 20.5, + "eval_f1": 42.9075, + "eval_f1_for_answerability_classification": 52.5641, + "eval_f1_for_cause_effect_classification": 54.9142, + "eval_f1_for_coreference_resolution": 43.6011, + "eval_f1_for_data_to_text": 42.4567, + "eval_f1_for_dialogue_act_recognition": 38.2857, + "eval_f1_for_grammar_error_correction": 58.9537, + "eval_f1_for_keyword_tagging": 50.9082, + "eval_f1_for_overlap_extraction": 27.4812, + "eval_f1_for_question_rewriting": 70.3706, + "eval_f1_for_task020_mctaco_answerability_classification": 49.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_f1_for_task034_winogrande_question_rewriting": 92.8543, + "eval_f1_for_task035_winogrande_question_rewriting": 86.058, + "eval_f1_for_task036_qasc_keyword_tagging": 61.0362, + "eval_f1_for_task039_qasc_overlap_extraction": 28.0667, + "eval_f1_for_task050_multirc_answerability_classification": 44.0, + "eval_f1_for_task102_commongen_data_to_text": 52.9072, + "eval_f1_for_task1152_bard_word_analogy": 13.0, + "eval_f1_for_task1153_bard_word_analogy": 13.0, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 50.0, + "eval_f1_for_task1156_bard_word_analogy": 24.0, + "eval_f1_for_task1157_bard_word_analogy": 25.0, + "eval_f1_for_task1158_bard_word_analogy": 12.0, + "eval_f1_for_task1159_bard_word_analogy": 16.0, + "eval_f1_for_task1161_coda_19_title_generation": 26.1138, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.1333, + "eval_f1_for_task121_zest_question_rewriting": 49.0018, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 11.1485, + "eval_f1_for_task1344_rte_textual_entailment": 56.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.7203, + "eval_f1_for_task1356_xlsum_title_generation": 13.6538, + "eval_f1_for_task1358_xlsum_title_generation": 29.2439, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 22.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_f1_for_task1407_dart_data_to_text": 29.1441, + "eval_f1_for_task1409_dart_data_to_text": 44.4467, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 34.6769, + "eval_f1_for_task1439_doqa_answerability_classification": 53.0, + "eval_f1_for_task1442_doqa_answerability_classification": 55.0, + "eval_f1_for_task1516_imppres_textual_entailment": 16.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 26.5713, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2305, + "eval_f1_for_task1562_zest_question_rewriting": 54.7056, + "eval_f1_for_task1586_scifact_title_generation": 28.599, + "eval_f1_for_task1598_nyc_data_to_text": 38.7099, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.2348, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 67.0183, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_f1_for_task1659_billsum_title_generation": 31.7206, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.019, + "eval_f1_for_task1728_web_nlg_data_to_text": 43.17, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 35.0, + "eval_f1_for_task201_multinli_textual_entailment": 35.0, + "eval_f1_for_task202_multinli_textual_entailment": 29.0, + "eval_f1_for_task219_rocstories_title_generation": 14.9586, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 56.1143, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 26.8957, + "eval_f1_for_task288_gigaword_title_generation": 24.3783, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 48.7, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 50.3667, + "eval_f1_for_task349_squad2.0_answerability_classification": 43.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.6819, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 32.0, + "eval_f1_for_task402_grailqa_question_rewriting": 73.5725, + "eval_f1_for_task418_persent_title_generation": 18.9328, + "eval_f1_for_task442_com_qa_question_rewriting": 69.5857, + "eval_f1_for_task500_scruples_title_generation": 12.76, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 34.7089, + "eval_f1_for_task520_aquamuse_answerability_classification": 53.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 22.5862, + "eval_f1_for_task602_wikitext_title_generation": 9.8736, + "eval_f1_for_task613_liar_keyword_tagging": 17.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 26.051, + "eval_f1_for_task619_ohsumed_title_generation": 35.9489, + "eval_f1_for_task620_ohsumed_keyword_tagging": 32.2, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 39.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.9714, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 26.497, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.1966, + "eval_f1_for_task671_ambigqa_question_rewriting": 69.0133, + "eval_f1_for_task677_ollie_data_to_text": 23.953, + "eval_f1_for_task738_perspectrum_textual_entailment": 49.0, + "eval_f1_for_task743_eurlex_title_generation": 26.1297, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.2692, + "eval_f1_for_task769_qed_title_generation": 81.207, + "eval_f1_for_task827_copa_cause_effect_classification": 53.0, + "eval_f1_for_task828_copa_cause_effect_classification": 55.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 35.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.719, + "eval_f1_for_task892_gap_coreference_resolution": 27.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 50.493, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.4583, + "eval_f1_for_title_generation": 27.8562, + "eval_f1_for_word_analogy": 21.0, + "eval_gen_len": 9.9032, + "eval_global_step": 3500, + "eval_loss": 1.4488416910171509, + "eval_rouge1": 44.5077, + "eval_rouge1_for_answerability_classification": 52.5641, + "eval_rouge1_for_cause_effect_classification": 55.2393, + "eval_rouge1_for_coreference_resolution": 44.375, + "eval_rouge1_for_data_to_text": 45.9609, + "eval_rouge1_for_dialogue_act_recognition": 40.4476, + "eval_rouge1_for_grammar_error_correction": 62.8543, + "eval_rouge1_for_keyword_tagging": 56.1517, + "eval_rouge1_for_overlap_extraction": 28.984, + "eval_rouge1_for_question_rewriting": 71.8439, + "eval_rouge1_for_task020_mctaco_answerability_classification": 49.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 44.5, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.827, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.8006, + "eval_rouge1_for_task036_qasc_keyword_tagging": 63.9203, + "eval_rouge1_for_task039_qasc_overlap_extraction": 30.4, + "eval_rouge1_for_task050_multirc_answerability_classification": 44.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.1569, + "eval_rouge1_for_task1152_bard_word_analogy": 13.0, + "eval_rouge1_for_task1153_bard_word_analogy": 13.0, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 50.0, + "eval_rouge1_for_task1156_bard_word_analogy": 24.0, + "eval_rouge1_for_task1157_bard_word_analogy": 25.0, + "eval_rouge1_for_task1158_bard_word_analogy": 12.0, + "eval_rouge1_for_task1159_bard_word_analogy": 16.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 28.8973, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.391, + "eval_rouge1_for_task121_zest_question_rewriting": 51.0218, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.3392, + "eval_rouge1_for_task1344_rte_textual_entailment": 56.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.3168, + "eval_rouge1_for_task1356_xlsum_title_generation": 16.0767, + "eval_rouge1_for_task1358_xlsum_title_generation": 32.9431, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 22.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.8, + "eval_rouge1_for_task1407_dart_data_to_text": 29.3923, + "eval_rouge1_for_task1409_dart_data_to_text": 44.5394, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 39.1823, + "eval_rouge1_for_task1439_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 16.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 28.5923, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5263, + "eval_rouge1_for_task1562_zest_question_rewriting": 57.6423, + "eval_rouge1_for_task1586_scifact_title_generation": 32.2347, + "eval_rouge1_for_task1598_nyc_data_to_text": 40.9944, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.5128, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 67.7491, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1659_billsum_title_generation": 33.6707, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.019, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.6214, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 35.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 29.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.5111, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 56.281, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 27.568, + "eval_rouge1_for_task288_gigaword_title_generation": 27.2845, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 49.9667, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 50.8, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 43.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.3084, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 38.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.3741, + "eval_rouge1_for_task418_persent_title_generation": 21.448, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.7691, + "eval_rouge1_for_task500_scruples_title_generation": 14.7406, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 35.1052, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 53.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 26.0421, + "eval_rouge1_for_task602_wikitext_title_generation": 10.5461, + "eval_rouge1_for_task613_liar_keyword_tagging": 32.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 27.6999, + "eval_rouge1_for_task619_ohsumed_title_generation": 38.5659, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.7, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 39.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4714, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 26.397, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.8899, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.7373, + "eval_rouge1_for_task677_ollie_data_to_text": 25.6835, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 49.0, + "eval_rouge1_for_task743_eurlex_title_generation": 28.2552, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.4357, + "eval_rouge1_for_task769_qed_title_generation": 81.2166, + "eval_rouge1_for_task827_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 55.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 35.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.9524, + "eval_rouge1_for_task892_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 50.6071, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.2639, + "eval_rouge1_for_title_generation": 29.9205, + "eval_rouge1_for_word_analogy": 21.0, + "eval_rougeL": 43.223, + "eval_rougeL_for_answerability_classification": 52.5641, + "eval_rougeL_for_cause_effect_classification": 54.481, + "eval_rougeL_for_coreference_resolution": 44.375, + "eval_rougeL_for_data_to_text": 38.2593, + "eval_rougeL_for_dialogue_act_recognition": 40.4476, + "eval_rougeL_for_grammar_error_correction": 61.6843, + "eval_rougeL_for_keyword_tagging": 55.5031, + "eval_rougeL_for_overlap_extraction": 27.9165, + "eval_rougeL_for_question_rewriting": 68.414, + "eval_rougeL_for_task020_mctaco_answerability_classification": 49.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 44.5, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.827, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.4756, + "eval_rougeL_for_task036_qasc_keyword_tagging": 61.9943, + "eval_rougeL_for_task039_qasc_overlap_extraction": 30.4, + "eval_rougeL_for_task050_multirc_answerability_classification": 44.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.5031, + "eval_rougeL_for_task1152_bard_word_analogy": 13.0, + "eval_rougeL_for_task1153_bard_word_analogy": 13.0, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 50.0, + "eval_rougeL_for_task1156_bard_word_analogy": 24.0, + "eval_rougeL_for_task1157_bard_word_analogy": 25.0, + "eval_rougeL_for_task1158_bard_word_analogy": 12.0, + "eval_rougeL_for_task1159_bard_word_analogy": 16.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.115, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.2892, + "eval_rougeL_for_task121_zest_question_rewriting": 44.0978, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.7508, + "eval_rougeL_for_task1344_rte_textual_entailment": 56.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.5013, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.6067, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.4692, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 22.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.8, + "eval_rougeL_for_task1407_dart_data_to_text": 24.8814, + "eval_rougeL_for_task1409_dart_data_to_text": 37.7923, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.7584, + "eval_rougeL_for_task1439_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 16.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 27.0153, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6101, + "eval_rougeL_for_task1562_zest_question_rewriting": 49.7596, + "eval_rougeL_for_task1586_scifact_title_generation": 27.2019, + "eval_rougeL_for_task1598_nyc_data_to_text": 30.5063, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.2138, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 56.5643, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.2314, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.019, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 48.8538, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 35.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 29.0, + "eval_rougeL_for_task219_rocstories_title_generation": 18.1277, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 56.281, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 25.4329, + "eval_rougeL_for_task288_gigaword_title_generation": 23.4692, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 49.9667, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 50.8, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 43.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.5901, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 38.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 63.7394, + "eval_rougeL_for_task418_persent_title_generation": 18.6298, + "eval_rougeL_for_task442_com_qa_question_rewriting": 68.5384, + "eval_rougeL_for_task500_scruples_title_generation": 14.0429, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 34.6711, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 53.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 25.8199, + "eval_rougeL_for_task602_wikitext_title_generation": 10.3965, + "eval_rougeL_for_task613_liar_keyword_tagging": 32.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 24.1103, + "eval_rougeL_for_task619_ohsumed_title_generation": 34.9856, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 38.3833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 39.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.4714, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 26.397, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.0435, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 69.0688, + "eval_rougeL_for_task677_ollie_data_to_text": 20.3132, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 49.0, + "eval_rougeL_for_task743_eurlex_title_generation": 24.063, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.1822, + "eval_rougeL_for_task769_qed_title_generation": 81.2166, + "eval_rougeL_for_task827_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 55.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 35.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.9524, + "eval_rougeL_for_task892_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.7802, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.2639, + "eval_rougeL_for_title_generation": 27.755, + "eval_rougeL_for_word_analogy": 21.0, + "eval_runtime": 432.0256, + "eval_samples_per_second": 27.568, + "eval_steps_per_second": 0.863, + "step": 3500 + }, + { + "epoch": 0.91, + "learning_rate": 5e-05, + "loss": 1.4883, + "step": 4000 + }, + { + "epoch": 0.91, + "eval_exact_match": 25.8606, + "eval_exact_match_for_answerability_classification": 49.6154, + "eval_exact_match_for_cause_effect_classification": 36.5714, + "eval_exact_match_for_coreference_resolution": 34.5714, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 33.7143, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 38.4, + "eval_exact_match_for_overlap_extraction": 15.0, + "eval_exact_match_for_question_rewriting": 1.2727, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 37.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 25.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 30.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 43.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 12.0, + "eval_exact_match_for_task1153_bard_word_analogy": 10.0, + "eval_exact_match_for_task1154_bard_word_analogy": 13.0, + "eval_exact_match_for_task1155_bard_word_analogy": 52.0, + "eval_exact_match_for_task1156_bard_word_analogy": 24.0, + "eval_exact_match_for_task1157_bard_word_analogy": 17.0, + "eval_exact_match_for_task1158_bard_word_analogy": 10.0, + "eval_exact_match_for_task1159_bard_word_analogy": 12.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 20.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 59.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 0.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 36.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 12.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 4.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 43.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 28.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 47.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 32.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 56.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 42.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 9.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 38.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 26.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 57.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 14.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 7.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 59.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 56.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 36.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 50.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 57.0, + "eval_exact_match_for_textual_entailment": 38.9583, + "eval_exact_match_for_title_generation": 6.7265, + "eval_exact_match_for_word_analogy": 18.75, + "eval_f1": 42.3429, + "eval_f1_for_answerability_classification": 52.1795, + "eval_f1_for_cause_effect_classification": 54.475, + "eval_f1_for_coreference_resolution": 43.1431, + "eval_f1_for_data_to_text": 43.2829, + "eval_f1_for_dialogue_act_recognition": 37.2857, + "eval_f1_for_grammar_error_correction": 58.1955, + "eval_f1_for_keyword_tagging": 51.5086, + "eval_f1_for_overlap_extraction": 34.0308, + "eval_f1_for_question_rewriting": 69.0804, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.5403, + "eval_f1_for_task035_winogrande_question_rewriting": 85.5033, + "eval_f1_for_task036_qasc_keyword_tagging": 62.9527, + "eval_f1_for_task039_qasc_overlap_extraction": 34.6667, + "eval_f1_for_task050_multirc_answerability_classification": 43.0, + "eval_f1_for_task102_commongen_data_to_text": 53.0366, + "eval_f1_for_task1152_bard_word_analogy": 12.0, + "eval_f1_for_task1153_bard_word_analogy": 15.3333, + "eval_f1_for_task1154_bard_word_analogy": 13.0, + "eval_f1_for_task1155_bard_word_analogy": 52.0, + "eval_f1_for_task1156_bard_word_analogy": 24.0, + "eval_f1_for_task1157_bard_word_analogy": 17.0, + "eval_f1_for_task1158_bard_word_analogy": 10.0, + "eval_f1_for_task1159_bard_word_analogy": 12.0, + "eval_f1_for_task1161_coda_19_title_generation": 26.4178, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.8996, + "eval_f1_for_task121_zest_question_rewriting": 48.4124, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.3357, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.2696, + "eval_f1_for_task1356_xlsum_title_generation": 14.5209, + "eval_f1_for_task1358_xlsum_title_generation": 30.3399, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 20.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 27.3589, + "eval_f1_for_task1409_dart_data_to_text": 45.1412, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 33.0663, + "eval_f1_for_task1439_doqa_answerability_classification": 59.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 0.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 28.8824, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.3247, + "eval_f1_for_task1562_zest_question_rewriting": 54.3571, + "eval_f1_for_task1586_scifact_title_generation": 28.4956, + "eval_f1_for_task1598_nyc_data_to_text": 39.2057, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 36.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 77.7871, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_f1_for_task1631_open_pi_data_to_text": 70.3455, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_f1_for_task1659_billsum_title_generation": 27.116, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 61.381, + "eval_f1_for_task1728_web_nlg_data_to_text": 47.6372, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 43.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 28.0, + "eval_f1_for_task219_rocstories_title_generation": 14.4477, + "eval_f1_for_task220_rocstories_title_generation": 47.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 32.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 59.3476, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.395, + "eval_f1_for_task288_gigaword_title_generation": 24.2066, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 36.3556, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 52.5333, + "eval_f1_for_task349_squad2.0_answerability_classification": 38.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.4356, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 30.5, + "eval_f1_for_task402_grailqa_question_rewriting": 69.6722, + "eval_f1_for_task418_persent_title_generation": 17.556, + "eval_f1_for_task442_com_qa_question_rewriting": 66.6522, + "eval_f1_for_task500_scruples_title_generation": 11.8795, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 30.1135, + "eval_f1_for_task520_aquamuse_answerability_classification": 57.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 19.3267, + "eval_f1_for_task602_wikitext_title_generation": 8.4252, + "eval_f1_for_task613_liar_keyword_tagging": 14.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 25.2227, + "eval_f1_for_task619_ohsumed_title_generation": 33.5448, + "eval_f1_for_task620_ohsumed_keyword_tagging": 34.619, + "eval_f1_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.3048, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 11.8333, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.8861, + "eval_f1_for_task671_ambigqa_question_rewriting": 64.9043, + "eval_f1_for_task677_ollie_data_to_text": 21.7961, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 24.5084, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.2779, + "eval_f1_for_task769_qed_title_generation": 74.8781, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 56.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 36.0, + "eval_f1_for_task891_gap_coreference_resolution": 58.219, + "eval_f1_for_task892_gap_coreference_resolution": 35.0, + "eval_f1_for_task893_gap_coreference_resolution": 31.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task957_e2e_data_to_text": 51.8832, + "eval_f1_for_task970_sherliic_textual_entailment": 57.0, + "eval_f1_for_textual_entailment": 38.9583, + "eval_f1_for_title_generation": 26.2694, + "eval_f1_for_word_analogy": 19.4167, + "eval_gen_len": 10.3652, + "eval_global_step": 4000, + "eval_loss": 1.4239813089370728, + "eval_rouge1": 43.8956, + "eval_rouge1_for_answerability_classification": 52.1795, + "eval_rouge1_for_cause_effect_classification": 54.9016, + "eval_rouge1_for_coreference_resolution": 43.6883, + "eval_rouge1_for_data_to_text": 46.4686, + "eval_rouge1_for_dialogue_act_recognition": 39.1905, + "eval_rouge1_for_grammar_error_correction": 63.782, + "eval_rouge1_for_keyword_tagging": 56.6126, + "eval_rouge1_for_overlap_extraction": 36.3524, + "eval_rouge1_for_question_rewriting": 70.7297, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.513, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.2614, + "eval_rouge1_for_task036_qasc_keyword_tagging": 65.8414, + "eval_rouge1_for_task039_qasc_overlap_extraction": 38.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 43.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.7052, + "eval_rouge1_for_task1152_bard_word_analogy": 12.0, + "eval_rouge1_for_task1153_bard_word_analogy": 15.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 13.0, + "eval_rouge1_for_task1155_bard_word_analogy": 52.0, + "eval_rouge1_for_task1156_bard_word_analogy": 24.0, + "eval_rouge1_for_task1157_bard_word_analogy": 17.0, + "eval_rouge1_for_task1158_bard_word_analogy": 10.0, + "eval_rouge1_for_task1159_bard_word_analogy": 12.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 29.8094, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.1573, + "eval_rouge1_for_task121_zest_question_rewriting": 51.1534, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 8.6053, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.8275, + "eval_rouge1_for_task1356_xlsum_title_generation": 16.1, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.4496, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 20.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_rouge1_for_task1407_dart_data_to_text": 28.5772, + "eval_rouge1_for_task1409_dart_data_to_text": 45.4306, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.9453, + "eval_rouge1_for_task1439_doqa_answerability_classification": 59.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 0.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 31.3246, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6186, + "eval_rouge1_for_task1562_zest_question_rewriting": 57.3046, + "eval_rouge1_for_task1586_scifact_title_generation": 30.9256, + "eval_rouge1_for_task1598_nyc_data_to_text": 41.877, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.4349, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 71.3893, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rouge1_for_task1659_billsum_title_generation": 29.0251, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 61.381, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 54.483, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 43.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 28.0, + "eval_rouge1_for_task219_rocstories_title_generation": 17.0087, + "eval_rouge1_for_task220_rocstories_title_generation": 47.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 32.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.5143, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.2048, + "eval_rouge1_for_task288_gigaword_title_generation": 27.0882, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 36.9222, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 52.9667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 38.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.8936, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 33.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 71.8667, + "eval_rouge1_for_task418_persent_title_generation": 20.1949, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.3773, + "eval_rouge1_for_task500_scruples_title_generation": 13.5807, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 30.4049, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 57.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 22.0535, + "eval_rouge1_for_task602_wikitext_title_generation": 8.5958, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 27.751, + "eval_rouge1_for_task619_ohsumed_title_generation": 35.9508, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.75, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.8048, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 12.2333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.7104, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.4199, + "eval_rouge1_for_task677_ollie_data_to_text": 24.0012, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 26.4607, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.5254, + "eval_rouge1_for_task769_qed_title_generation": 74.9599, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 56.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 36.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 58.4524, + "eval_rouge1_for_task892_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task957_e2e_data_to_text": 52.1905, + "eval_rouge1_for_task970_sherliic_textual_entailment": 57.0, + "eval_rouge1_for_textual_entailment": 40.7361, + "eval_rouge1_for_title_generation": 28.0921, + "eval_rouge1_for_word_analogy": 19.4167, + "eval_rougeL": 42.631, + "eval_rougeL_for_answerability_classification": 52.1795, + "eval_rougeL_for_cause_effect_classification": 54.0446, + "eval_rougeL_for_coreference_resolution": 43.6883, + "eval_rougeL_for_data_to_text": 39.2725, + "eval_rougeL_for_dialogue_act_recognition": 39.1905, + "eval_rougeL_for_grammar_error_correction": 62.6736, + "eval_rougeL_for_keyword_tagging": 56.0008, + "eval_rougeL_for_overlap_extraction": 34.9176, + "eval_rougeL_for_question_rewriting": 67.3611, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.513, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.8714, + "eval_rougeL_for_task036_qasc_keyword_tagging": 63.866, + "eval_rougeL_for_task039_qasc_overlap_extraction": 38.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 43.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.4622, + "eval_rougeL_for_task1152_bard_word_analogy": 12.0, + "eval_rougeL_for_task1153_bard_word_analogy": 15.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 13.0, + "eval_rougeL_for_task1155_bard_word_analogy": 52.0, + "eval_rougeL_for_task1156_bard_word_analogy": 24.0, + "eval_rougeL_for_task1157_bard_word_analogy": 17.0, + "eval_rougeL_for_task1158_bard_word_analogy": 10.0, + "eval_rougeL_for_task1159_bard_word_analogy": 12.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.2251, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0972, + "eval_rougeL_for_task121_zest_question_rewriting": 44.799, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.1085, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.0548, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.829, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.1894, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 20.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_rougeL_for_task1407_dart_data_to_text": 24.856, + "eval_rougeL_for_task1409_dart_data_to_text": 39.1301, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 39.6462, + "eval_rougeL_for_task1439_doqa_answerability_classification": 59.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 0.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 30.0073, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7011, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.8702, + "eval_rougeL_for_task1586_scifact_title_generation": 25.7281, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.5024, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.1381, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 61.8384, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 53.0, + "eval_rougeL_for_task1659_billsum_title_generation": 23.9586, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.381, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.2438, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 43.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 28.0, + "eval_rougeL_for_task219_rocstories_title_generation": 17.0087, + "eval_rougeL_for_task220_rocstories_title_generation": 47.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 48.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 32.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 56.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.5143, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 31.3351, + "eval_rougeL_for_task288_gigaword_title_generation": 23.0119, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 36.9222, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 52.9667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 38.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 31.7896, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 33.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.181, + "eval_rougeL_for_task418_persent_title_generation": 17.0359, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.3024, + "eval_rougeL_for_task500_scruples_title_generation": 12.258, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 29.8788, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 57.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 21.653, + "eval_rougeL_for_task602_wikitext_title_generation": 8.4904, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 23.8562, + "eval_rougeL_for_task619_ohsumed_title_generation": 32.2297, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 38.6667, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 51.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.8048, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 12.2333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.8573, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.2881, + "eval_rougeL_for_task677_ollie_data_to_text": 19.3568, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 22.967, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.8972, + "eval_rougeL_for_task769_qed_title_generation": 74.9599, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 56.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 36.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 58.4524, + "eval_rougeL_for_task892_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.9877, + "eval_rougeL_for_task970_sherliic_textual_entailment": 57.0, + "eval_rougeL_for_textual_entailment": 40.7361, + "eval_rougeL_for_title_generation": 25.8509, + "eval_rougeL_for_word_analogy": 19.4167, + "eval_runtime": 436.1855, + "eval_samples_per_second": 27.305, + "eval_steps_per_second": 0.855, + "step": 4000 + }, + { + "epoch": 1.03, + "learning_rate": 5e-05, + "loss": 1.4454, + "step": 4500 + }, + { + "epoch": 1.03, + "eval_exact_match": 25.6003, + "eval_exact_match_for_answerability_classification": 45.6923, + "eval_exact_match_for_cause_effect_classification": 36.0, + "eval_exact_match_for_coreference_resolution": 36.2143, + "eval_exact_match_for_data_to_text": 0.3632, + "eval_exact_match_for_dialogue_act_recognition": 35.4286, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 37.6, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 1.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 40.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 26.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 48.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 13.0, + "eval_exact_match_for_task1153_bard_word_analogy": 13.0, + "eval_exact_match_for_task1154_bard_word_analogy": 11.0, + "eval_exact_match_for_task1155_bard_word_analogy": 52.0, + "eval_exact_match_for_task1156_bard_word_analogy": 24.0, + "eval_exact_match_for_task1157_bard_word_analogy": 22.0, + "eval_exact_match_for_task1158_bard_word_analogy": 9.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 1.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 41.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 30.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 25.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 5.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 31.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 39.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_exact_match_for_task1659_billsum_title_generation": 0.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 20.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 29.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 29.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 13.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 49.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 39.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 18.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 43.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 49.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 51.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 27.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 0.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 35.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 84.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 18.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 53.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 63.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 36.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 22.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 60.0, + "eval_exact_match_for_textual_entailment": 38.3333, + "eval_exact_match_for_title_generation": 7.0628, + "eval_exact_match_for_word_analogy": 19.75, + "eval_f1": 42.2183, + "eval_f1_for_answerability_classification": 48.2564, + "eval_f1_for_cause_effect_classification": 54.4801, + "eval_f1_for_coreference_resolution": 44.2374, + "eval_f1_for_data_to_text": 42.7877, + "eval_f1_for_dialogue_act_recognition": 38.9286, + "eval_f1_for_grammar_error_correction": 60.513, + "eval_f1_for_keyword_tagging": 50.7202, + "eval_f1_for_overlap_extraction": 32.313, + "eval_f1_for_question_rewriting": 69.3467, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 41.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.7537, + "eval_f1_for_task035_winogrande_question_rewriting": 87.5619, + "eval_f1_for_task036_qasc_keyword_tagging": 63.6032, + "eval_f1_for_task039_qasc_overlap_extraction": 31.3333, + "eval_f1_for_task050_multirc_answerability_classification": 48.0, + "eval_f1_for_task102_commongen_data_to_text": 50.9586, + "eval_f1_for_task1152_bard_word_analogy": 13.0, + "eval_f1_for_task1153_bard_word_analogy": 16.3333, + "eval_f1_for_task1154_bard_word_analogy": 11.0, + "eval_f1_for_task1155_bard_word_analogy": 52.0, + "eval_f1_for_task1156_bard_word_analogy": 24.0, + "eval_f1_for_task1157_bard_word_analogy": 22.0, + "eval_f1_for_task1158_bard_word_analogy": 9.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.7509, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 80.0199, + "eval_f1_for_task121_zest_question_rewriting": 48.9068, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.2965, + "eval_f1_for_task1344_rte_textual_entailment": 41.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.9534, + "eval_f1_for_task1356_xlsum_title_generation": 13.2893, + "eval_f1_for_task1358_xlsum_title_generation": 30.6981, + "eval_f1_for_task1385_anli_textual_entailment": 30.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 25.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_f1_for_task1407_dart_data_to_text": 26.8457, + "eval_f1_for_task1409_dart_data_to_text": 44.2358, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.6864, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 5.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 28.1218, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.3397, + "eval_f1_for_task1562_zest_question_rewriting": 54.1559, + "eval_f1_for_task1586_scifact_title_generation": 28.4546, + "eval_f1_for_task1598_nyc_data_to_text": 39.676, + "eval_f1_for_task1612_sick_textual_entailment": 31.0, + "eval_f1_for_task1615_sick_textual_entailment": 39.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.4247, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 70.9131, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_f1_for_task1659_billsum_title_generation": 27.9334, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 63.2857, + "eval_f1_for_task1728_web_nlg_data_to_text": 46.3367, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 29.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 13.1627, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 29.0, + "eval_f1_for_task233_iirc_answerability_classification": 13.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 49.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 47.0667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 33.2926, + "eval_f1_for_task288_gigaword_title_generation": 24.6047, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 55.019, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 50.2667, + "eval_f1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.7068, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 30.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 68.7674, + "eval_f1_for_task418_persent_title_generation": 20.0731, + "eval_f1_for_task442_com_qa_question_rewriting": 69.0349, + "eval_f1_for_task500_scruples_title_generation": 12.6877, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.7141, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 19.2965, + "eval_f1_for_task602_wikitext_title_generation": 9.2644, + "eval_f1_for_task613_liar_keyword_tagging": 13.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 27.9872, + "eval_f1_for_task619_ohsumed_title_generation": 34.3535, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.6667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 35.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.331, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 20.8, + "eval_f1_for_task670_ambigqa_question_rewriting": 77.4622, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.7728, + "eval_f1_for_task677_ollie_data_to_text": 21.4568, + "eval_f1_for_task738_perspectrum_textual_entailment": 53.0, + "eval_f1_for_task743_eurlex_title_generation": 29.4514, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.0398, + "eval_f1_for_task769_qed_title_generation": 80.18, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 52.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 36.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.219, + "eval_f1_for_task892_gap_coreference_resolution": 39.0, + "eval_f1_for_task893_gap_coreference_resolution": 33.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 22.0, + "eval_f1_for_task957_e2e_data_to_text": 52.2137, + "eval_f1_for_task970_sherliic_textual_entailment": 60.0, + "eval_f1_for_textual_entailment": 38.3333, + "eval_f1_for_title_generation": 27.514, + "eval_f1_for_word_analogy": 20.1667, + "eval_gen_len": 10.0732, + "eval_global_step": 4500, + "eval_loss": 1.460721731185913, + "eval_rouge1": 43.832, + "eval_rouge1_for_answerability_classification": 48.2564, + "eval_rouge1_for_cause_effect_classification": 54.9113, + "eval_rouge1_for_coreference_resolution": 45.166, + "eval_rouge1_for_data_to_text": 46.4251, + "eval_rouge1_for_dialogue_act_recognition": 41.1476, + "eval_rouge1_for_grammar_error_correction": 64.759, + "eval_rouge1_for_keyword_tagging": 55.6951, + "eval_rouge1_for_overlap_extraction": 33.9614, + "eval_rouge1_for_question_rewriting": 70.898, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 44.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.7264, + "eval_rouge1_for_task035_winogrande_question_rewriting": 88.2209, + "eval_rouge1_for_task036_qasc_keyword_tagging": 66.1445, + "eval_rouge1_for_task039_qasc_overlap_extraction": 33.6667, + "eval_rouge1_for_task050_multirc_answerability_classification": 48.0, + "eval_rouge1_for_task102_commongen_data_to_text": 62.24, + "eval_rouge1_for_task1152_bard_word_analogy": 13.0, + "eval_rouge1_for_task1153_bard_word_analogy": 16.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 11.0, + "eval_rouge1_for_task1155_bard_word_analogy": 52.0, + "eval_rouge1_for_task1156_bard_word_analogy": 24.0, + "eval_rouge1_for_task1157_bard_word_analogy": 22.0, + "eval_rouge1_for_task1158_bard_word_analogy": 9.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 31.1627, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.2776, + "eval_rouge1_for_task121_zest_question_rewriting": 51.0115, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.5567, + "eval_rouge1_for_task1344_rte_textual_entailment": 41.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.5499, + "eval_rouge1_for_task1356_xlsum_title_generation": 15.3858, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.3656, + "eval_rouge1_for_task1385_anli_textual_entailment": 30.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 25.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 7.2, + "eval_rouge1_for_task1407_dart_data_to_text": 28.911, + "eval_rouge1_for_task1409_dart_data_to_text": 44.5886, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.8857, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 5.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 31.0202, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6323, + "eval_rouge1_for_task1562_zest_question_rewriting": 57.1043, + "eval_rouge1_for_task1586_scifact_title_generation": 31.6313, + "eval_rouge1_for_task1598_nyc_data_to_text": 41.9643, + "eval_rouge1_for_task1612_sick_textual_entailment": 31.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 79.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.6974, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 71.8235, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rouge1_for_task1659_billsum_title_generation": 29.8438, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 63.2857, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 56.7195, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 29.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 16.5332, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 29.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 13.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 49.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 47.7333, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.2562, + "eval_rouge1_for_task288_gigaword_title_generation": 27.4253, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 56.6857, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 50.8667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 35.038, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 38.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.9299, + "eval_rouge1_for_task418_persent_title_generation": 22.6725, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.7186, + "eval_rouge1_for_task500_scruples_title_generation": 14.5041, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 32.8408, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 22.7363, + "eval_rouge1_for_task602_wikitext_title_generation": 9.682, + "eval_rouge1_for_task613_liar_keyword_tagging": 29.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 30.6742, + "eval_rouge1_for_task619_ohsumed_title_generation": 36.8528, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 38.8333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 35.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 93.831, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 20.8, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 78.2008, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 66.4409, + "eval_rouge1_for_task677_ollie_data_to_text": 23.4834, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rouge1_for_task743_eurlex_title_generation": 31.1886, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.312, + "eval_rouge1_for_task769_qed_title_generation": 80.2012, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 36.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.4524, + "eval_rouge1_for_task892_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 22.0, + "eval_rouge1_for_task957_e2e_data_to_text": 52.8795, + "eval_rouge1_for_task970_sherliic_textual_entailment": 60.0, + "eval_rouge1_for_textual_entailment": 40.0278, + "eval_rouge1_for_title_generation": 29.5434, + "eval_rouge1_for_word_analogy": 20.1667, + "eval_rougeL": 42.5176, + "eval_rougeL_for_answerability_classification": 48.2564, + "eval_rougeL_for_cause_effect_classification": 54.1167, + "eval_rougeL_for_coreference_resolution": 45.166, + "eval_rougeL_for_data_to_text": 39.3515, + "eval_rougeL_for_dialogue_act_recognition": 41.1476, + "eval_rougeL_for_grammar_error_correction": 63.4894, + "eval_rougeL_for_keyword_tagging": 55.0962, + "eval_rougeL_for_overlap_extraction": 32.613, + "eval_rougeL_for_question_rewriting": 67.2925, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 44.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.7264, + "eval_rougeL_for_task035_winogrande_question_rewriting": 88.1071, + "eval_rougeL_for_task036_qasc_keyword_tagging": 64.4668, + "eval_rougeL_for_task039_qasc_overlap_extraction": 33.6667, + "eval_rougeL_for_task050_multirc_answerability_classification": 48.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.6097, + "eval_rougeL_for_task1152_bard_word_analogy": 13.0, + "eval_rougeL_for_task1153_bard_word_analogy": 16.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 11.0, + "eval_rougeL_for_task1155_bard_word_analogy": 52.0, + "eval_rougeL_for_task1156_bard_word_analogy": 24.0, + "eval_rougeL_for_task1157_bard_word_analogy": 22.0, + "eval_rougeL_for_task1158_bard_word_analogy": 9.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 26.0189, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.1203, + "eval_rougeL_for_task121_zest_question_rewriting": 43.5609, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.0291, + "eval_rougeL_for_task1344_rte_textual_entailment": 41.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.6773, + "eval_rougeL_for_task1356_xlsum_title_generation": 12.606, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.4349, + "eval_rougeL_for_task1385_anli_textual_entailment": 30.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 25.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 52.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 7.2, + "eval_rougeL_for_task1407_dart_data_to_text": 25.3146, + "eval_rougeL_for_task1409_dart_data_to_text": 38.1982, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.2627, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 5.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 29.3452, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7161, + "eval_rougeL_for_task1562_zest_question_rewriting": 48.0262, + "eval_rougeL_for_task1586_scifact_title_generation": 26.1028, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.7825, + "eval_rougeL_for_task1612_sick_textual_entailment": 31.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 79.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.4006, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 62.8845, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 52.0, + "eval_rougeL_for_task1659_billsum_title_generation": 25.3697, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 63.2857, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.7684, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 29.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 16.5332, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 29.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 13.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 49.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 47.7333, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 31.5594, + "eval_rougeL_for_task288_gigaword_title_generation": 23.276, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 56.6857, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 50.8667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.3347, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 38.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 59.8221, + "eval_rougeL_for_task418_persent_title_generation": 19.3178, + "eval_rougeL_for_task442_com_qa_question_rewriting": 68.5155, + "eval_rougeL_for_task500_scruples_title_generation": 13.6858, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.0782, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 22.1141, + "eval_rougeL_for_task602_wikitext_title_generation": 9.5245, + "eval_rougeL_for_task613_liar_keyword_tagging": 29.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 26.8152, + "eval_rougeL_for_task619_ohsumed_title_generation": 32.5709, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 37.5167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 35.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 93.831, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 20.8, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 76.9724, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 65.2889, + "eval_rougeL_for_task677_ollie_data_to_text": 19.2775, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 53.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.7036, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0221, + "eval_rougeL_for_task769_qed_title_generation": 80.2012, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 36.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.4524, + "eval_rougeL_for_task892_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 48.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 22.0, + "eval_rougeL_for_task957_e2e_data_to_text": 42.4222, + "eval_rougeL_for_task970_sherliic_textual_entailment": 60.0, + "eval_rougeL_for_textual_entailment": 40.0278, + "eval_rougeL_for_title_generation": 27.0397, + "eval_rougeL_for_word_analogy": 20.1667, + "eval_runtime": 377.5119, + "eval_samples_per_second": 31.549, + "eval_steps_per_second": 0.988, + "step": 4500 + }, + { + "epoch": 1.14, + "learning_rate": 5e-05, + "loss": 1.3613, + "step": 5000 + }, + { + "epoch": 1.14, + "eval_exact_match": 25.9278, + "eval_exact_match_for_answerability_classification": 49.0, + "eval_exact_match_for_cause_effect_classification": 35.8571, + "eval_exact_match_for_coreference_resolution": 35.5714, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 36.2857, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 34.6, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.0, + "eval_exact_match_for_task020_mctaco_answerability_classification": 50.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 35.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 16.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 13.0, + "eval_exact_match_for_task1153_bard_word_analogy": 13.0, + "eval_exact_match_for_task1154_bard_word_analogy": 13.0, + "eval_exact_match_for_task1155_bard_word_analogy": 51.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 26.0, + "eval_exact_match_for_task1158_bard_word_analogy": 11.0, + "eval_exact_match_for_task1159_bard_word_analogy": 15.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 22.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 1.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 50.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 16.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 20.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 35.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 43.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 45.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 24.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 12.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 58.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 62.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 39.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 39.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 37.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.9167, + "eval_exact_match_for_title_generation": 7.0628, + "eval_exact_match_for_word_analogy": 20.625, + "eval_f1": 42.7715, + "eval_f1_for_answerability_classification": 51.5641, + "eval_f1_for_cause_effect_classification": 53.6473, + "eval_f1_for_coreference_resolution": 44.2632, + "eval_f1_for_data_to_text": 41.3809, + "eval_f1_for_dialogue_act_recognition": 40.2143, + "eval_f1_for_grammar_error_correction": 62.4875, + "eval_f1_for_keyword_tagging": 49.5351, + "eval_f1_for_overlap_extraction": 30.7291, + "eval_f1_for_question_rewriting": 69.9215, + "eval_f1_for_task020_mctaco_answerability_classification": 50.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 36.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.9182, + "eval_f1_for_task035_winogrande_question_rewriting": 87.0029, + "eval_f1_for_task036_qasc_keyword_tagging": 58.661, + "eval_f1_for_task039_qasc_overlap_extraction": 29.8889, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 50.9567, + "eval_f1_for_task1152_bard_word_analogy": 13.0, + "eval_f1_for_task1153_bard_word_analogy": 15.6667, + "eval_f1_for_task1154_bard_word_analogy": 13.0, + "eval_f1_for_task1155_bard_word_analogy": 51.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 26.0, + "eval_f1_for_task1158_bard_word_analogy": 11.0, + "eval_f1_for_task1159_bard_word_analogy": 15.0, + "eval_f1_for_task1161_coda_19_title_generation": 28.711, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.5505, + "eval_f1_for_task121_zest_question_rewriting": 47.6993, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.1976, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.8459, + "eval_f1_for_task1356_xlsum_title_generation": 13.7662, + "eval_f1_for_task1358_xlsum_title_generation": 30.5929, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 32.0, + "eval_f1_for_task1387_anli_textual_entailment": 34.0, + "eval_f1_for_task1388_cb_textual_entailment": 22.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 5.0, + "eval_f1_for_task1407_dart_data_to_text": 24.3949, + "eval_f1_for_task1409_dart_data_to_text": 44.1721, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.3174, + "eval_f1_for_task1439_doqa_answerability_classification": 50.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 16.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 27.4546, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.6576, + "eval_f1_for_task1562_zest_question_rewriting": 56.2119, + "eval_f1_for_task1586_scifact_title_generation": 27.4382, + "eval_f1_for_task1598_nyc_data_to_text": 38.6195, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.2794, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 72.2012, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_f1_for_task1659_billsum_title_generation": 31.4779, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 66.5857, + "eval_f1_for_task1728_web_nlg_data_to_text": 41.6209, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 31.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 15.4729, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 35.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 54.0071, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 31.5693, + "eval_f1_for_task288_gigaword_title_generation": 23.9606, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 57.3199, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 47.2524, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 72.5, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.6875, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 30.5, + "eval_f1_for_task402_grailqa_question_rewriting": 68.0222, + "eval_f1_for_task418_persent_title_generation": 18.6354, + "eval_f1_for_task442_com_qa_question_rewriting": 69.9589, + "eval_f1_for_task500_scruples_title_generation": 11.4408, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 33.5948, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 21.848, + "eval_f1_for_task602_wikitext_title_generation": 9.7003, + "eval_f1_for_task613_liar_keyword_tagging": 13.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.1769, + "eval_f1_for_task619_ohsumed_title_generation": 35.8719, + "eval_f1_for_task620_ohsumed_keyword_tagging": 35.2333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.1143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 20.1333, + "eval_f1_for_task670_ambigqa_question_rewriting": 80.8338, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.8134, + "eval_f1_for_task677_ollie_data_to_text": 17.2612, + "eval_f1_for_task738_perspectrum_textual_entailment": 58.0, + "eval_f1_for_task743_eurlex_title_generation": 29.972, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.5925, + "eval_f1_for_task769_qed_title_generation": 81.7417, + "eval_f1_for_task827_copa_cause_effect_classification": 48.0, + "eval_f1_for_task828_copa_cause_effect_classification": 53.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 39.0, + "eval_f1_for_task891_gap_coreference_resolution": 52.0524, + "eval_f1_for_task892_gap_coreference_resolution": 39.0, + "eval_f1_for_task893_gap_coreference_resolution": 33.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 37.0, + "eval_f1_for_task957_e2e_data_to_text": 51.6461, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.9167, + "eval_f1_for_title_generation": 28.0451, + "eval_f1_for_word_analogy": 20.9583, + "eval_gen_len": 10.3657, + "eval_global_step": 5000, + "eval_loss": 1.4977169036865234, + "eval_rouge1": 44.4591, + "eval_rouge1_for_answerability_classification": 51.5641, + "eval_rouge1_for_cause_effect_classification": 53.9746, + "eval_rouge1_for_coreference_resolution": 44.9417, + "eval_rouge1_for_data_to_text": 46.0844, + "eval_rouge1_for_dialogue_act_recognition": 42.4333, + "eval_rouge1_for_grammar_error_correction": 67.4563, + "eval_rouge1_for_keyword_tagging": 54.5471, + "eval_rouge1_for_overlap_extraction": 32.1844, + "eval_rouge1_for_question_rewriting": 71.3724, + "eval_rouge1_for_task020_mctaco_answerability_classification": 50.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 39.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.8931, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.6616, + "eval_rouge1_for_task036_qasc_keyword_tagging": 60.7876, + "eval_rouge1_for_task039_qasc_overlap_extraction": 32.2222, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.0584, + "eval_rouge1_for_task1152_bard_word_analogy": 13.0, + "eval_rouge1_for_task1153_bard_word_analogy": 15.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 13.0, + "eval_rouge1_for_task1155_bard_word_analogy": 51.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 26.0, + "eval_rouge1_for_task1158_bard_word_analogy": 11.0, + "eval_rouge1_for_task1159_bard_word_analogy": 15.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 32.3384, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.8082, + "eval_rouge1_for_task121_zest_question_rewriting": 49.6231, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.4544, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.4425, + "eval_rouge1_for_task1356_xlsum_title_generation": 15.9499, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.3019, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 22.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 7.2, + "eval_rouge1_for_task1407_dart_data_to_text": 28.4796, + "eval_rouge1_for_task1409_dart_data_to_text": 44.7357, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 47.9726, + "eval_rouge1_for_task1439_doqa_answerability_classification": 50.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 16.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 30.2873, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.94, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.9111, + "eval_rouge1_for_task1586_scifact_title_generation": 31.1951, + "eval_rouge1_for_task1598_nyc_data_to_text": 41.0495, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.5575, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 73.2941, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1659_billsum_title_generation": 32.9595, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 66.5857, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.0588, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.7761, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 35.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 54.6738, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 32.1465, + "eval_rouge1_for_task288_gigaword_title_generation": 26.7488, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 58.7048, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 47.6333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 72.5, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.2881, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 34.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.0044, + "eval_rouge1_for_task418_persent_title_generation": 21.2835, + "eval_rouge1_for_task442_com_qa_question_rewriting": 73.2436, + "eval_rouge1_for_task500_scruples_title_generation": 13.8482, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.5536, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 24.5647, + "eval_rouge1_for_task602_wikitext_title_generation": 10.0498, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 25.8675, + "eval_rouge1_for_task619_ohsumed_title_generation": 38.396, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.0, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.6143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 20.1333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 81.5844, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.3665, + "eval_rouge1_for_task677_ollie_data_to_text": 19.0231, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 58.0, + "eval_rouge1_for_task743_eurlex_title_generation": 31.7825, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.798, + "eval_rouge1_for_task769_qed_title_generation": 81.7246, + "eval_rouge1_for_task827_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 39.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 52.2857, + "eval_rouge1_for_task892_gap_coreference_resolution": 39.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 37.0, + "eval_rouge1_for_task957_e2e_data_to_text": 52.9705, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 40.7778, + "eval_rouge1_for_title_generation": 30.0788, + "eval_rouge1_for_word_analogy": 20.9583, + "eval_rougeL": 43.169, + "eval_rougeL_for_answerability_classification": 51.5641, + "eval_rougeL_for_cause_effect_classification": 53.2635, + "eval_rougeL_for_coreference_resolution": 44.9417, + "eval_rougeL_for_data_to_text": 39.2989, + "eval_rougeL_for_dialogue_act_recognition": 42.4333, + "eval_rougeL_for_grammar_error_correction": 65.9294, + "eval_rougeL_for_keyword_tagging": 53.5998, + "eval_rougeL_for_overlap_extraction": 31.1852, + "eval_rougeL_for_question_rewriting": 67.9492, + "eval_rougeL_for_task020_mctaco_answerability_classification": 50.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 39.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.8931, + "eval_rougeL_for_task035_winogrande_question_rewriting": 87.4064, + "eval_rougeL_for_task036_qasc_keyword_tagging": 57.3683, + "eval_rougeL_for_task039_qasc_overlap_extraction": 32.2222, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 55.3913, + "eval_rougeL_for_task1152_bard_word_analogy": 13.0, + "eval_rougeL_for_task1153_bard_word_analogy": 15.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 13.0, + "eval_rougeL_for_task1155_bard_word_analogy": 51.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 26.0, + "eval_rougeL_for_task1158_bard_word_analogy": 11.0, + "eval_rougeL_for_task1159_bard_word_analogy": 15.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 26.42, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.6509, + "eval_rougeL_for_task121_zest_question_rewriting": 43.662, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.9747, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.5162, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.3899, + "eval_rougeL_for_task1358_xlsum_title_generation": 28.9464, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 22.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 7.2, + "eval_rougeL_for_task1407_dart_data_to_text": 25.1571, + "eval_rougeL_for_task1409_dart_data_to_text": 38.2412, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 45.8238, + "eval_rougeL_for_task1439_doqa_answerability_classification": 50.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 16.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 25.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 28.5799, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.0351, + "eval_rougeL_for_task1562_zest_question_rewriting": 50.2445, + "eval_rougeL_for_task1586_scifact_title_generation": 26.1383, + "eval_rougeL_for_task1598_nyc_data_to_text": 33.0738, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.2606, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 64.5563, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.4418, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 66.5857, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.4341, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 18.4206, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 35.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 54.6738, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 30.1482, + "eval_rougeL_for_task288_gigaword_title_generation": 22.4498, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 58.7048, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 47.6333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 72.5, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.5708, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 34.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 57.8459, + "eval_rougeL_for_task418_persent_title_generation": 17.944, + "eval_rougeL_for_task442_com_qa_question_rewriting": 69.2906, + "eval_rougeL_for_task500_scruples_title_generation": 12.7315, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.9734, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 23.6344, + "eval_rougeL_for_task602_wikitext_title_generation": 9.8423, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.6068, + "eval_rougeL_for_task619_ohsumed_title_generation": 34.9173, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.6833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.6143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 20.1333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 80.9033, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.7672, + "eval_rougeL_for_task677_ollie_data_to_text": 16.1771, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 58.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.4713, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.3102, + "eval_rougeL_for_task769_qed_title_generation": 81.7246, + "eval_rougeL_for_task827_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 59.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 39.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 52.2857, + "eval_rougeL_for_task892_gap_coreference_resolution": 39.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 38.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 37.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.7178, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 40.7778, + "eval_rougeL_for_title_generation": 27.5463, + "eval_rougeL_for_word_analogy": 20.9583, + "eval_runtime": 439.5276, + "eval_samples_per_second": 27.097, + "eval_steps_per_second": 0.849, + "step": 5000 + }, + { + "epoch": 1.26, + "learning_rate": 5e-05, + "loss": 1.3768, + "step": 5500 + }, + { + "epoch": 1.26, + "eval_exact_match": 25.8858, + "eval_exact_match_for_answerability_classification": 50.3846, + "eval_exact_match_for_cause_effect_classification": 36.1429, + "eval_exact_match_for_coreference_resolution": 34.7857, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 35.0, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 38.8, + "eval_exact_match_for_overlap_extraction": 11.0, + "eval_exact_match_for_question_rewriting": 1.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 54.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 31.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 28.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 22.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 13.0, + "eval_exact_match_for_task1153_bard_word_analogy": 11.0, + "eval_exact_match_for_task1154_bard_word_analogy": 15.0, + "eval_exact_match_for_task1155_bard_word_analogy": 53.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 26.0, + "eval_exact_match_for_task1158_bard_word_analogy": 15.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 1.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 49.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 36.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 41.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 60.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 4.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 1.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 11.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 32.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 47.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 38.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 48.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 44.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 23.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 26.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 6.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 49.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 10.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 58.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 37.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 19.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 16.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 54.0, + "eval_exact_match_for_textual_entailment": 37.5417, + "eval_exact_match_for_title_generation": 7.1188, + "eval_exact_match_for_word_analogy": 21.25, + "eval_f1": 42.5847, + "eval_f1_for_answerability_classification": 52.9487, + "eval_f1_for_cause_effect_classification": 54.1429, + "eval_f1_for_coreference_resolution": 43.8272, + "eval_f1_for_data_to_text": 40.5923, + "eval_f1_for_dialogue_act_recognition": 38.5714, + "eval_f1_for_grammar_error_correction": 60.6511, + "eval_f1_for_keyword_tagging": 51.3862, + "eval_f1_for_overlap_extraction": 28.8717, + "eval_f1_for_question_rewriting": 70.2527, + "eval_f1_for_task020_mctaco_answerability_classification": 54.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 34.1667, + "eval_f1_for_task034_winogrande_question_rewriting": 92.9182, + "eval_f1_for_task035_winogrande_question_rewriting": 86.4697, + "eval_f1_for_task036_qasc_keyword_tagging": 64.1169, + "eval_f1_for_task039_qasc_overlap_extraction": 27.3889, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 51.1129, + "eval_f1_for_task1152_bard_word_analogy": 13.0, + "eval_f1_for_task1153_bard_word_analogy": 14.3333, + "eval_f1_for_task1154_bard_word_analogy": 15.0, + "eval_f1_for_task1155_bard_word_analogy": 53.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 26.0, + "eval_f1_for_task1158_bard_word_analogy": 15.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 29.0967, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.6705, + "eval_f1_for_task121_zest_question_rewriting": 50.211, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.6555, + "eval_f1_for_task1344_rte_textual_entailment": 49.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.059, + "eval_f1_for_task1356_xlsum_title_generation": 14.9228, + "eval_f1_for_task1358_xlsum_title_generation": 30.5868, + "eval_f1_for_task1385_anli_textual_entailment": 32.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 36.0, + "eval_f1_for_task1388_cb_textual_entailment": 41.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_f1_for_task1407_dart_data_to_text": 25.7274, + "eval_f1_for_task1409_dart_data_to_text": 44.6197, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.0103, + "eval_f1_for_task1439_doqa_answerability_classification": 60.0, + "eval_f1_for_task1442_doqa_answerability_classification": 53.0, + "eval_f1_for_task1516_imppres_textual_entailment": 4.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 26.8819, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2919, + "eval_f1_for_task1562_zest_question_rewriting": 56.4277, + "eval_f1_for_task1586_scifact_title_generation": 28.7758, + "eval_f1_for_task1598_nyc_data_to_text": 37.1318, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.2442, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_f1_for_task1631_open_pi_data_to_text": 67.9506, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_f1_for_task1659_billsum_title_generation": 29.454, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 60.419, + "eval_f1_for_task1728_web_nlg_data_to_text": 42.3712, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 30.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 32.0, + "eval_f1_for_task219_rocstories_title_generation": 16.2024, + "eval_f1_for_task220_rocstories_title_generation": 47.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 38.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 53.6571, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 30.3546, + "eval_f1_for_task288_gigaword_title_generation": 24.3524, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 60.2333, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 48.0857, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.4327, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.6667, + "eval_f1_for_task402_grailqa_question_rewriting": 71.1039, + "eval_f1_for_task418_persent_title_generation": 19.5937, + "eval_f1_for_task442_com_qa_question_rewriting": 67.6206, + "eval_f1_for_task500_scruples_title_generation": 12.2715, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.9403, + "eval_f1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 23.4808, + "eval_f1_for_task602_wikitext_title_generation": 12.2924, + "eval_f1_for_task613_liar_keyword_tagging": 15.0, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.9012, + "eval_f1_for_task619_ohsumed_title_generation": 34.5755, + "eval_f1_for_task620_ohsumed_keyword_tagging": 34.4, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.4143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 15.6333, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.558, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.4965, + "eval_f1_for_task677_ollie_data_to_text": 15.3719, + "eval_f1_for_task738_perspectrum_textual_entailment": 58.0, + "eval_f1_for_task743_eurlex_title_generation": 28.3418, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.042, + "eval_f1_for_task769_qed_title_generation": 83.0847, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 53.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 37.0, + "eval_f1_for_task891_gap_coreference_resolution": 50.719, + "eval_f1_for_task892_gap_coreference_resolution": 44.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 19.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 16.0, + "eval_f1_for_task957_e2e_data_to_text": 50.2157, + "eval_f1_for_task970_sherliic_textual_entailment": 54.0, + "eval_f1_for_textual_entailment": 37.5417, + "eval_f1_for_title_generation": 28.1694, + "eval_f1_for_word_analogy": 21.6667, + "eval_gen_len": 10.259, + "eval_global_step": 5500, + "eval_loss": 1.4333692789077759, + "eval_rouge1": 44.2423, + "eval_rouge1_for_answerability_classification": 52.9487, + "eval_rouge1_for_cause_effect_classification": 54.4124, + "eval_rouge1_for_coreference_resolution": 44.5044, + "eval_rouge1_for_data_to_text": 45.0244, + "eval_rouge1_for_dialogue_act_recognition": 40.6905, + "eval_rouge1_for_grammar_error_correction": 65.1902, + "eval_rouge1_for_keyword_tagging": 56.1905, + "eval_rouge1_for_overlap_extraction": 30.4998, + "eval_rouge1_for_question_rewriting": 71.7235, + "eval_rouge1_for_task020_mctaco_answerability_classification": 54.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 35.6667, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.8931, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.2201, + "eval_rouge1_for_task036_qasc_keyword_tagging": 66.6049, + "eval_rouge1_for_task039_qasc_overlap_extraction": 29.7222, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.5909, + "eval_rouge1_for_task1152_bard_word_analogy": 13.0, + "eval_rouge1_for_task1153_bard_word_analogy": 14.3333, + "eval_rouge1_for_task1154_bard_word_analogy": 15.0, + "eval_rouge1_for_task1155_bard_word_analogy": 53.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 26.0, + "eval_rouge1_for_task1158_bard_word_analogy": 15.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 32.6375, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.9282, + "eval_rouge1_for_task121_zest_question_rewriting": 51.7118, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.8685, + "eval_rouge1_for_task1344_rte_textual_entailment": 49.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.5617, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.3391, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.5593, + "eval_rouge1_for_task1385_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 36.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 41.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 7.5, + "eval_rouge1_for_task1407_dart_data_to_text": 29.2584, + "eval_rouge1_for_task1409_dart_data_to_text": 44.7768, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 43.7943, + "eval_rouge1_for_task1439_doqa_answerability_classification": 60.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 4.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 30.0624, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.586, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.2519, + "eval_rouge1_for_task1586_scifact_title_generation": 31.9932, + "eval_rouge1_for_task1598_nyc_data_to_text": 39.6047, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.517, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 68.5878, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_rouge1_for_task1659_billsum_title_generation": 31.4383, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 60.419, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 56.9313, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 32.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.1961, + "eval_rouge1_for_task220_rocstories_title_generation": 47.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 38.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 53.8238, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 31.2774, + "eval_rouge1_for_task288_gigaword_title_generation": 27.1267, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 61.6333, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 48.0333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.9539, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 38.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 73.2638, + "eval_rouge1_for_task418_persent_title_generation": 22.5505, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.3277, + "eval_rouge1_for_task500_scruples_title_generation": 13.9033, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.2554, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 26.166, + "eval_rouge1_for_task602_wikitext_title_generation": 13.566, + "eval_rouge1_for_task613_liar_keyword_tagging": 32.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 26.2664, + "eval_rouge1_for_task619_ohsumed_title_generation": 36.7836, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 38.1, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.9143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 15.5333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.2214, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 69.0613, + "eval_rouge1_for_task677_ollie_data_to_text": 16.9469, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 58.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.5464, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.3981, + "eval_rouge1_for_task769_qed_title_generation": 83.1251, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 37.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 50.9524, + "eval_rouge1_for_task892_gap_coreference_resolution": 44.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 19.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 16.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.3209, + "eval_rouge1_for_task970_sherliic_textual_entailment": 54.0, + "eval_rouge1_for_textual_entailment": 39.4028, + "eval_rouge1_for_title_generation": 30.2661, + "eval_rouge1_for_word_analogy": 21.6667, + "eval_rougeL": 42.9625, + "eval_rougeL_for_answerability_classification": 52.9487, + "eval_rougeL_for_cause_effect_classification": 53.8085, + "eval_rougeL_for_coreference_resolution": 44.5044, + "eval_rougeL_for_data_to_text": 37.7114, + "eval_rougeL_for_dialogue_act_recognition": 40.6905, + "eval_rougeL_for_grammar_error_correction": 63.8409, + "eval_rougeL_for_keyword_tagging": 55.5417, + "eval_rougeL_for_overlap_extraction": 29.5775, + "eval_rougeL_for_question_rewriting": 68.4226, + "eval_rougeL_for_task020_mctaco_answerability_classification": 54.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 35.6667, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.8931, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.6743, + "eval_rougeL_for_task036_qasc_keyword_tagging": 64.7442, + "eval_rougeL_for_task039_qasc_overlap_extraction": 29.7222, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 53.9003, + "eval_rougeL_for_task1152_bard_word_analogy": 13.0, + "eval_rougeL_for_task1153_bard_word_analogy": 14.3333, + "eval_rougeL_for_task1154_bard_word_analogy": 15.0, + "eval_rougeL_for_task1155_bard_word_analogy": 53.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 26.0, + "eval_rougeL_for_task1158_bard_word_analogy": 15.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 27.0721, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.7709, + "eval_rougeL_for_task121_zest_question_rewriting": 45.8105, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.1231, + "eval_rougeL_for_task1344_rte_textual_entailment": 49.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.6999, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.5617, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.4909, + "eval_rougeL_for_task1385_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 36.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 41.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 47.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 7.5, + "eval_rougeL_for_task1407_dart_data_to_text": 26.2165, + "eval_rougeL_for_task1409_dart_data_to_text": 38.0765, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.0144, + "eval_rougeL_for_task1439_doqa_answerability_classification": 60.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 4.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 28.7418, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6673, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.5544, + "eval_rougeL_for_task1586_scifact_title_generation": 26.9242, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.5916, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.2202, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 56.537, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 45.0, + "eval_rougeL_for_task1659_billsum_title_generation": 26.6375, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 60.419, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.3235, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 32.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.0627, + "eval_rougeL_for_task220_rocstories_title_generation": 47.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 52.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 38.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 48.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 53.8238, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 29.4329, + "eval_rougeL_for_task288_gigaword_title_generation": 23.1429, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 61.6333, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 48.0333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.6998, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 38.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.733, + "eval_rougeL_for_task418_persent_title_generation": 19.3193, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.353, + "eval_rougeL_for_task500_scruples_title_generation": 12.685, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.4837, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 49.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 25.0375, + "eval_rougeL_for_task602_wikitext_title_generation": 13.3586, + "eval_rougeL_for_task613_liar_keyword_tagging": 32.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 23.2933, + "eval_rougeL_for_task619_ohsumed_title_generation": 33.9889, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 36.7167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.9143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 15.5333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.6319, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 68.3072, + "eval_rougeL_for_task677_ollie_data_to_text": 14.6, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 58.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.6338, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.1127, + "eval_rougeL_for_task769_qed_title_generation": 83.1251, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 38.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 37.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 50.9524, + "eval_rougeL_for_task892_gap_coreference_resolution": 44.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 19.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 16.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.4419, + "eval_rougeL_for_task970_sherliic_textual_entailment": 54.0, + "eval_rougeL_for_textual_entailment": 39.4028, + "eval_rougeL_for_title_generation": 27.8168, + "eval_rougeL_for_word_analogy": 21.6667, + "eval_runtime": 389.2104, + "eval_samples_per_second": 30.6, + "eval_steps_per_second": 0.958, + "step": 5500 + }, + { + "epoch": 1.37, + "learning_rate": 5e-05, + "loss": 1.4033, + "step": 6000 + }, + { + "epoch": 1.37, + "eval_exact_match": 26.2972, + "eval_exact_match_for_answerability_classification": 50.2308, + "eval_exact_match_for_cause_effect_classification": 36.5714, + "eval_exact_match_for_coreference_resolution": 33.0714, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 35.1429, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 38.2, + "eval_exact_match_for_overlap_extraction": 9.0, + "eval_exact_match_for_question_rewriting": 1.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 47.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 34.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 18.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 41.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 12.0, + "eval_exact_match_for_task1153_bard_word_analogy": 10.0, + "eval_exact_match_for_task1154_bard_word_analogy": 14.0, + "eval_exact_match_for_task1155_bard_word_analogy": 58.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 28.0, + "eval_exact_match_for_task1158_bard_word_analogy": 8.0, + "eval_exact_match_for_task1159_bard_word_analogy": 12.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 26.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 33.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 9.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 29.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 51.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_exact_match_for_task1659_billsum_title_generation": 1.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 13.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 4.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 47.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 47.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 51.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 21.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 39.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 3.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 63.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 1.1905, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 15.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 15.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 57.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 61.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 51.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 55.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 43.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 40.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 13.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 28.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 51.0, + "eval_exact_match_for_textual_entailment": 41.125, + "eval_exact_match_for_title_generation": 7.0067, + "eval_exact_match_for_word_analogy": 20.625, + "eval_f1": 43.2387, + "eval_f1_for_answerability_classification": 52.7949, + "eval_f1_for_cause_effect_classification": 54.5761, + "eval_f1_for_coreference_resolution": 42.4007, + "eval_f1_for_data_to_text": 41.7248, + "eval_f1_for_dialogue_act_recognition": 38.7143, + "eval_f1_for_grammar_error_correction": 61.8585, + "eval_f1_for_keyword_tagging": 52.0646, + "eval_f1_for_overlap_extraction": 31.6826, + "eval_f1_for_question_rewriting": 69.676, + "eval_f1_for_task020_mctaco_answerability_classification": 47.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 36.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.9466, + "eval_f1_for_task035_winogrande_question_rewriting": 85.2238, + "eval_f1_for_task036_qasc_keyword_tagging": 63.0803, + "eval_f1_for_task039_qasc_overlap_extraction": 24.7222, + "eval_f1_for_task050_multirc_answerability_classification": 41.0, + "eval_f1_for_task102_commongen_data_to_text": 53.4291, + "eval_f1_for_task1152_bard_word_analogy": 12.0, + "eval_f1_for_task1153_bard_word_analogy": 12.6667, + "eval_f1_for_task1154_bard_word_analogy": 14.0, + "eval_f1_for_task1155_bard_word_analogy": 58.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 28.0, + "eval_f1_for_task1158_bard_word_analogy": 8.0, + "eval_f1_for_task1159_bard_word_analogy": 12.0, + "eval_f1_for_task1161_coda_19_title_generation": 29.5315, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.7034, + "eval_f1_for_task121_zest_question_rewriting": 48.9654, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 11.092, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.5484, + "eval_f1_for_task1356_xlsum_title_generation": 15.0231, + "eval_f1_for_task1358_xlsum_title_generation": 30.5012, + "eval_f1_for_task1385_anli_textual_entailment": 26.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 37.0, + "eval_f1_for_task1388_cb_textual_entailment": 33.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_f1_for_task1407_dart_data_to_text": 23.2398, + "eval_f1_for_task1409_dart_data_to_text": 44.8001, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 39.9324, + "eval_f1_for_task1439_doqa_answerability_classification": 54.0, + "eval_f1_for_task1442_doqa_answerability_classification": 57.0, + "eval_f1_for_task1516_imppres_textual_entailment": 9.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 29.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 29.1265, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.7845, + "eval_f1_for_task1562_zest_question_rewriting": 56.0162, + "eval_f1_for_task1586_scifact_title_generation": 28.8364, + "eval_f1_for_task1598_nyc_data_to_text": 37.6103, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 51.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.3354, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_f1_for_task1631_open_pi_data_to_text": 66.9732, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_f1_for_task1659_billsum_title_generation": 30.7447, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.5143, + "eval_f1_for_task1728_web_nlg_data_to_text": 45.6241, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 17.4117, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 47.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 64.8905, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 38.643, + "eval_f1_for_task288_gigaword_title_generation": 24.0551, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 56.9667, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 47.0857, + "eval_f1_for_task349_squad2.0_answerability_classification": 39.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.6383, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 72.703, + "eval_f1_for_task418_persent_title_generation": 22.7142, + "eval_f1_for_task442_com_qa_question_rewriting": 67.8645, + "eval_f1_for_task500_scruples_title_generation": 12.4664, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 35.8341, + "eval_f1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 18.8434, + "eval_f1_for_task602_wikitext_title_generation": 10.0742, + "eval_f1_for_task613_liar_keyword_tagging": 16.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 25.7275, + "eval_f1_for_task619_ohsumed_title_generation": 32.2138, + "eval_f1_for_task620_ohsumed_keyword_tagging": 36.0333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.8762, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 20.9333, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.3429, + "eval_f1_for_task671_ambigqa_question_rewriting": 66.7866, + "eval_f1_for_task677_ollie_data_to_text": 20.8017, + "eval_f1_for_task738_perspectrum_textual_entailment": 57.0, + "eval_f1_for_task743_eurlex_title_generation": 28.8311, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.251, + "eval_f1_for_task769_qed_title_generation": 77.9133, + "eval_f1_for_task827_copa_cause_effect_classification": 51.0, + "eval_f1_for_task828_copa_cause_effect_classification": 55.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 43.0, + "eval_f1_for_task891_gap_coreference_resolution": 48.0524, + "eval_f1_for_task892_gap_coreference_resolution": 13.0, + "eval_f1_for_task893_gap_coreference_resolution": 28.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 51.3232, + "eval_f1_for_task970_sherliic_textual_entailment": 51.0, + "eval_f1_for_textual_entailment": 41.125, + "eval_f1_for_title_generation": 28.2287, + "eval_f1_for_word_analogy": 20.9583, + "eval_gen_len": 10.7323, + "eval_global_step": 6000, + "eval_loss": 1.4575122594833374, + "eval_rouge1": 44.7569, + "eval_rouge1_for_answerability_classification": 52.7949, + "eval_rouge1_for_cause_effect_classification": 54.8325, + "eval_rouge1_for_coreference_resolution": 43.0493, + "eval_rouge1_for_data_to_text": 45.7784, + "eval_rouge1_for_dialogue_act_recognition": 40.9619, + "eval_rouge1_for_grammar_error_correction": 66.1279, + "eval_rouge1_for_keyword_tagging": 56.7723, + "eval_rouge1_for_overlap_extraction": 33.1906, + "eval_rouge1_for_question_rewriting": 71.2152, + "eval_rouge1_for_task020_mctaco_answerability_classification": 47.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 38.8333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.9215, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.1473, + "eval_rouge1_for_task036_qasc_keyword_tagging": 66.2328, + "eval_rouge1_for_task039_qasc_overlap_extraction": 27.0556, + "eval_rouge1_for_task050_multirc_answerability_classification": 41.0, + "eval_rouge1_for_task102_commongen_data_to_text": 66.1549, + "eval_rouge1_for_task1152_bard_word_analogy": 12.0, + "eval_rouge1_for_task1153_bard_word_analogy": 12.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 14.0, + "eval_rouge1_for_task1155_bard_word_analogy": 58.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 28.0, + "eval_rouge1_for_task1158_bard_word_analogy": 8.0, + "eval_rouge1_for_task1159_bard_word_analogy": 12.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 33.0185, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.9611, + "eval_rouge1_for_task121_zest_question_rewriting": 50.7363, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.4269, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.1449, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.487, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.9687, + "eval_rouge1_for_task1385_anli_textual_entailment": 26.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 33.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 8.4, + "eval_rouge1_for_task1407_dart_data_to_text": 26.4931, + "eval_rouge1_for_task1409_dart_data_to_text": 45.1458, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 45.1903, + "eval_rouge1_for_task1439_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 9.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 29.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 31.2666, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0656, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.1602, + "eval_rouge1_for_task1586_scifact_title_generation": 32.2149, + "eval_rouge1_for_task1598_nyc_data_to_text": 40.3807, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 83.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.6082, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 67.8915, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rouge1_for_task1659_billsum_title_generation": 32.4203, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.5143, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 56.1998, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 20.6347, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 47.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 65.0571, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 39.3256, + "eval_rouge1_for_task288_gigaword_title_generation": 26.8662, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 58.8667, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 47.0333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 39.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.1602, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 34.1667, + "eval_rouge1_for_task402_grailqa_question_rewriting": 74.6037, + "eval_rouge1_for_task418_persent_title_generation": 25.4993, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.4119, + "eval_rouge1_for_task500_scruples_title_generation": 14.6008, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 35.862, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 21.3782, + "eval_rouge1_for_task602_wikitext_title_generation": 11.0354, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 27.0008, + "eval_rouge1_for_task619_ohsumed_title_generation": 35.0006, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.1333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.1619, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 20.9333, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.2004, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 67.4716, + "eval_rouge1_for_task677_ollie_data_to_text": 22.5702, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 57.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.7344, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.5692, + "eval_rouge1_for_task769_qed_title_generation": 77.9835, + "eval_rouge1_for_task827_copa_cause_effect_classification": 51.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 55.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 43.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 48.2857, + "eval_rouge1_for_task892_gap_coreference_resolution": 13.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 28.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 52.3653, + "eval_rouge1_for_task970_sherliic_textual_entailment": 51.0, + "eval_rouge1_for_textual_entailment": 42.4861, + "eval_rouge1_for_title_generation": 30.2484, + "eval_rouge1_for_word_analogy": 20.9583, + "eval_rougeL": 43.4671, + "eval_rougeL_for_answerability_classification": 52.7949, + "eval_rougeL_for_cause_effect_classification": 54.2617, + "eval_rougeL_for_coreference_resolution": 43.0493, + "eval_rougeL_for_data_to_text": 38.4383, + "eval_rougeL_for_dialogue_act_recognition": 40.9619, + "eval_rougeL_for_grammar_error_correction": 64.6741, + "eval_rougeL_for_keyword_tagging": 56.0055, + "eval_rougeL_for_overlap_extraction": 32.4363, + "eval_rougeL_for_question_rewriting": 67.6755, + "eval_rougeL_for_task020_mctaco_answerability_classification": 47.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 38.8333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.9215, + "eval_rougeL_for_task035_winogrande_question_rewriting": 85.4648, + "eval_rougeL_for_task036_qasc_keyword_tagging": 64.1825, + "eval_rougeL_for_task039_qasc_overlap_extraction": 27.0556, + "eval_rougeL_for_task050_multirc_answerability_classification": 41.0, + "eval_rougeL_for_task102_commongen_data_to_text": 56.695, + "eval_rougeL_for_task1152_bard_word_analogy": 12.0, + "eval_rougeL_for_task1153_bard_word_analogy": 12.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 14.0, + "eval_rougeL_for_task1155_bard_word_analogy": 58.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 28.0, + "eval_rougeL_for_task1158_bard_word_analogy": 8.0, + "eval_rougeL_for_task1159_bard_word_analogy": 12.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 27.6377, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.8038, + "eval_rougeL_for_task121_zest_question_rewriting": 45.2114, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.6985, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.2722, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.9988, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.0346, + "eval_rougeL_for_task1385_anli_textual_entailment": 26.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 33.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 8.4, + "eval_rougeL_for_task1407_dart_data_to_text": 22.6403, + "eval_rougeL_for_task1409_dart_data_to_text": 38.0375, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 43.1889, + "eval_rougeL_for_task1439_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 9.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 29.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 30.1426, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.1593, + "eval_rougeL_for_task1562_zest_question_rewriting": 50.9217, + "eval_rougeL_for_task1586_scifact_title_generation": 26.7757, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.9577, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 83.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.3113, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 50.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 57.8665, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 57.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.2375, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 62.5143, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 48.2626, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 20.6347, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 47.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 47.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 65.0571, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 37.817, + "eval_rougeL_for_task288_gigaword_title_generation": 22.9404, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 58.8667, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 47.0333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 39.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.1497, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 34.1667, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.8402, + "eval_rougeL_for_task418_persent_title_generation": 22.5841, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.5249, + "eval_rougeL_for_task500_scruples_title_generation": 13.4757, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 35.1889, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 63.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 20.7655, + "eval_rougeL_for_task602_wikitext_title_generation": 10.828, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 24.0157, + "eval_rougeL_for_task619_ohsumed_title_generation": 31.6658, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.35, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.1619, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 20.9333, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 77.9267, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 66.2315, + "eval_rougeL_for_task677_ollie_data_to_text": 19.6558, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 57.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.972, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.1169, + "eval_rougeL_for_task769_qed_title_generation": 77.9835, + "eval_rougeL_for_task827_copa_cause_effect_classification": 51.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 55.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 43.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 48.2857, + "eval_rougeL_for_task892_gap_coreference_resolution": 13.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 28.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.5748, + "eval_rougeL_for_task970_sherliic_textual_entailment": 51.0, + "eval_rougeL_for_textual_entailment": 42.4861, + "eval_rougeL_for_title_generation": 27.9054, + "eval_rougeL_for_word_analogy": 20.9583, + "eval_runtime": 403.9741, + "eval_samples_per_second": 29.482, + "eval_steps_per_second": 0.923, + "step": 6000 + }, + { + "epoch": 1.49, + "learning_rate": 5e-05, + "loss": 1.358, + "step": 6500 + }, + { + "epoch": 1.49, + "eval_exact_match": 26.3644, + "eval_exact_match_for_answerability_classification": 50.9231, + "eval_exact_match_for_cause_effect_classification": 35.8571, + "eval_exact_match_for_coreference_resolution": 34.1429, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 36.4286, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 39.4, + "eval_exact_match_for_overlap_extraction": 12.5, + "eval_exact_match_for_question_rewriting": 1.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 39.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 33.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 25.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 50.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 13.0, + "eval_exact_match_for_task1153_bard_word_analogy": 6.0, + "eval_exact_match_for_task1154_bard_word_analogy": 9.0, + "eval_exact_match_for_task1155_bard_word_analogy": 55.0, + "eval_exact_match_for_task1156_bard_word_analogy": 24.0, + "eval_exact_match_for_task1157_bard_word_analogy": 33.0, + "eval_exact_match_for_task1158_bard_word_analogy": 9.0, + "eval_exact_match_for_task1159_bard_word_analogy": 16.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 0.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 26.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 56.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 54.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 13.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 43.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 18.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 30.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 50.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 48.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 17.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 46.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 49.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 25.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 3.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 48.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 34.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 40.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 87.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 14.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 54.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 61.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 40.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 19.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 24.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 39.75, + "eval_exact_match_for_title_generation": 6.9507, + "eval_exact_match_for_word_analogy": 20.625, + "eval_f1": 43.1979, + "eval_f1_for_answerability_classification": 53.4872, + "eval_f1_for_cause_effect_classification": 54.0227, + "eval_f1_for_coreference_resolution": 43.4189, + "eval_f1_for_data_to_text": 41.1668, + "eval_f1_for_dialogue_act_recognition": 40.0, + "eval_f1_for_grammar_error_correction": 60.2811, + "eval_f1_for_keyword_tagging": 52.617, + "eval_f1_for_overlap_extraction": 34.3553, + "eval_f1_for_question_rewriting": 69.4977, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.8333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.8166, + "eval_f1_for_task035_winogrande_question_rewriting": 84.6783, + "eval_f1_for_task036_qasc_keyword_tagging": 69.0709, + "eval_f1_for_task039_qasc_overlap_extraction": 32.1667, + "eval_f1_for_task050_multirc_answerability_classification": 50.0, + "eval_f1_for_task102_commongen_data_to_text": 51.4027, + "eval_f1_for_task1152_bard_word_analogy": 13.0, + "eval_f1_for_task1153_bard_word_analogy": 6.6667, + "eval_f1_for_task1154_bard_word_analogy": 9.0, + "eval_f1_for_task1155_bard_word_analogy": 55.0, + "eval_f1_for_task1156_bard_word_analogy": 24.0, + "eval_f1_for_task1157_bard_word_analogy": 33.0, + "eval_f1_for_task1158_bard_word_analogy": 9.0, + "eval_f1_for_task1159_bard_word_analogy": 16.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.435, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.5505, + "eval_f1_for_task121_zest_question_rewriting": 48.9145, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.0367, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.6917, + "eval_f1_for_task1356_xlsum_title_generation": 14.9562, + "eval_f1_for_task1358_xlsum_title_generation": 30.9575, + "eval_f1_for_task1385_anli_textual_entailment": 34.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 33.0, + "eval_f1_for_task1388_cb_textual_entailment": 26.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 6.0, + "eval_f1_for_task1407_dart_data_to_text": 29.066, + "eval_f1_for_task1409_dart_data_to_text": 42.1827, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.3106, + "eval_f1_for_task1439_doqa_answerability_classification": 56.0, + "eval_f1_for_task1442_doqa_answerability_classification": 54.0, + "eval_f1_for_task1516_imppres_textual_entailment": 13.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 28.4449, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2517, + "eval_f1_for_task1562_zest_question_rewriting": 55.3479, + "eval_f1_for_task1586_scifact_title_generation": 26.9896, + "eval_f1_for_task1598_nyc_data_to_text": 36.9252, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 43.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.4256, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_f1_for_task1631_open_pi_data_to_text": 68.9499, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_f1_for_task1659_billsum_title_generation": 30.3587, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 64.8286, + "eval_f1_for_task1728_web_nlg_data_to_text": 41.1727, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 30.0, + "eval_f1_for_task219_rocstories_title_generation": 17.0306, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 58.4238, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 36.544, + "eval_f1_for_task288_gigaword_title_generation": 25.106, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 56.941, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 52.919, + "eval_f1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.1403, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 31.5, + "eval_f1_for_task402_grailqa_question_rewriting": 71.7156, + "eval_f1_for_task418_persent_title_generation": 20.4769, + "eval_f1_for_task442_com_qa_question_rewriting": 68.166, + "eval_f1_for_task500_scruples_title_generation": 12.4165, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 33.7148, + "eval_f1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 20.5734, + "eval_f1_for_task602_wikitext_title_generation": 11.0448, + "eval_f1_for_task613_liar_keyword_tagging": 14.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 26.3516, + "eval_f1_for_task619_ohsumed_title_generation": 34.6917, + "eval_f1_for_task620_ohsumed_keyword_tagging": 35.3667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 34.0, + "eval_f1_for_task642_e_snli_textual_entailment": 40.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.3143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 22.6, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.2483, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.9198, + "eval_f1_for_task677_ollie_data_to_text": 19.1531, + "eval_f1_for_task738_perspectrum_textual_entailment": 54.0, + "eval_f1_for_task743_eurlex_title_generation": 30.527, + "eval_f1_for_task760_msr_sqa_data_to_text": 4.6625, + "eval_f1_for_task769_qed_title_generation": 83.9836, + "eval_f1_for_task827_copa_cause_effect_classification": 48.0, + "eval_f1_for_task828_copa_cause_effect_classification": 53.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 40.0, + "eval_f1_for_task891_gap_coreference_resolution": 52.819, + "eval_f1_for_task892_gap_coreference_resolution": 19.0, + "eval_f1_for_task893_gap_coreference_resolution": 24.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_f1_for_task957_e2e_data_to_text": 49.9729, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 39.75, + "eval_f1_for_title_generation": 28.418, + "eval_f1_for_word_analogy": 20.7083, + "eval_gen_len": 10.3907, + "eval_global_step": 6500, + "eval_loss": 1.4778538942337036, + "eval_rouge1": 44.7681, + "eval_rouge1_for_answerability_classification": 53.4872, + "eval_rouge1_for_cause_effect_classification": 54.2772, + "eval_rouge1_for_coreference_resolution": 44.2803, + "eval_rouge1_for_data_to_text": 45.2434, + "eval_rouge1_for_dialogue_act_recognition": 42.119, + "eval_rouge1_for_grammar_error_correction": 65.4801, + "eval_rouge1_for_keyword_tagging": 56.9929, + "eval_rouge1_for_overlap_extraction": 35.6771, + "eval_rouge1_for_question_rewriting": 70.9945, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 43.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.7934, + "eval_rouge1_for_task035_winogrande_question_rewriting": 85.625, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.5834, + "eval_rouge1_for_task039_qasc_overlap_extraction": 34.0, + "eval_rouge1_for_task050_multirc_answerability_classification": 50.0, + "eval_rouge1_for_task102_commongen_data_to_text": 62.6771, + "eval_rouge1_for_task1152_bard_word_analogy": 13.0, + "eval_rouge1_for_task1153_bard_word_analogy": 6.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 9.0, + "eval_rouge1_for_task1155_bard_word_analogy": 55.0, + "eval_rouge1_for_task1156_bard_word_analogy": 24.0, + "eval_rouge1_for_task1157_bard_word_analogy": 33.0, + "eval_rouge1_for_task1158_bard_word_analogy": 9.0, + "eval_rouge1_for_task1159_bard_word_analogy": 16.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.7867, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.8082, + "eval_rouge1_for_task121_zest_question_rewriting": 50.6307, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.2642, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.2668, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.1086, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.3695, + "eval_rouge1_for_task1385_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 26.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 7.5, + "eval_rouge1_for_task1407_dart_data_to_text": 33.3825, + "eval_rouge1_for_task1409_dart_data_to_text": 42.6044, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 44.4116, + "eval_rouge1_for_task1439_doqa_answerability_classification": 56.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 54.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 13.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 31.1025, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5485, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.1138, + "eval_rouge1_for_task1586_scifact_title_generation": 30.4752, + "eval_rouge1_for_task1598_nyc_data_to_text": 39.156, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 81.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.6983, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 69.613, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_rouge1_for_task1659_billsum_title_generation": 31.937, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 64.8286, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 54.3961, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 30.0, + "eval_rouge1_for_task219_rocstories_title_generation": 20.6661, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 59.0905, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 37.3542, + "eval_rouge1_for_task288_gigaword_title_generation": 27.7506, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 58.319, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 52.8667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.5917, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 38.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 73.7649, + "eval_rouge1_for_task418_persent_title_generation": 22.9546, + "eval_rouge1_for_task442_com_qa_question_rewriting": 71.4888, + "eval_rouge1_for_task500_scruples_title_generation": 14.1539, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.8196, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 23.5438, + "eval_rouge1_for_task602_wikitext_title_generation": 11.6771, + "eval_rouge1_for_task613_liar_keyword_tagging": 28.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 27.6821, + "eval_rouge1_for_task619_ohsumed_title_generation": 37.1979, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 40.5667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 34.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 40.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.8143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 22.6, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.1122, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 66.6377, + "eval_rouge1_for_task677_ollie_data_to_text": 20.6745, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 54.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.294, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 4.807, + "eval_rouge1_for_task769_qed_title_generation": 84.024, + "eval_rouge1_for_task827_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 40.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.0524, + "eval_rouge1_for_task892_gap_coreference_resolution": 19.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 24.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rouge1_for_task957_e2e_data_to_text": 49.957, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 41.3333, + "eval_rouge1_for_title_generation": 30.3956, + "eval_rouge1_for_word_analogy": 20.7083, + "eval_rougeL": 43.4974, + "eval_rougeL_for_answerability_classification": 53.4872, + "eval_rougeL_for_cause_effect_classification": 53.588, + "eval_rougeL_for_coreference_resolution": 44.2803, + "eval_rougeL_for_data_to_text": 38.1033, + "eval_rougeL_for_dialogue_act_recognition": 42.119, + "eval_rougeL_for_grammar_error_correction": 64.2925, + "eval_rougeL_for_keyword_tagging": 56.47, + "eval_rougeL_for_overlap_extraction": 34.8538, + "eval_rougeL_for_question_rewriting": 67.4658, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 43.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.7934, + "eval_rougeL_for_task035_winogrande_question_rewriting": 84.8023, + "eval_rougeL_for_task036_qasc_keyword_tagging": 70.3525, + "eval_rougeL_for_task039_qasc_overlap_extraction": 34.0, + "eval_rougeL_for_task050_multirc_answerability_classification": 50.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.52, + "eval_rougeL_for_task1152_bard_word_analogy": 13.0, + "eval_rougeL_for_task1153_bard_word_analogy": 6.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 9.0, + "eval_rougeL_for_task1155_bard_word_analogy": 55.0, + "eval_rougeL_for_task1156_bard_word_analogy": 24.0, + "eval_rougeL_for_task1157_bard_word_analogy": 33.0, + "eval_rougeL_for_task1158_bard_word_analogy": 9.0, + "eval_rougeL_for_task1159_bard_word_analogy": 16.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.6254, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.6509, + "eval_rougeL_for_task121_zest_question_rewriting": 45.28, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.7829, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.143, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.2253, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.8641, + "eval_rougeL_for_task1385_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 26.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 51.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 7.5, + "eval_rougeL_for_task1407_dart_data_to_text": 28.7085, + "eval_rougeL_for_task1409_dart_data_to_text": 35.8842, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.9552, + "eval_rougeL_for_task1439_doqa_answerability_classification": 56.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 54.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 13.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 37.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 29.5679, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6298, + "eval_rougeL_for_task1562_zest_question_rewriting": 49.8745, + "eval_rougeL_for_task1586_scifact_title_generation": 25.5217, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.9108, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 81.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.2904, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 56.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 56.3993, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 46.0, + "eval_rougeL_for_task1659_billsum_title_generation": 27.5032, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 64.8286, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 47.3337, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 30.0, + "eval_rougeL_for_task219_rocstories_title_generation": 20.3105, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 50.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 50.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 59.0905, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 35.7076, + "eval_rougeL_for_task288_gigaword_title_generation": 23.8139, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 58.319, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 52.8667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.1384, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 38.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 61.6554, + "eval_rougeL_for_task418_persent_title_generation": 19.7614, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.2113, + "eval_rougeL_for_task500_scruples_title_generation": 13.1702, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 33.195, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 48.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 23.0373, + "eval_rougeL_for_task602_wikitext_title_generation": 11.4697, + "eval_rougeL_for_task613_liar_keyword_tagging": 28.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 24.3107, + "eval_rougeL_for_task619_ohsumed_title_generation": 33.9217, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.1833, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 50.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 34.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 40.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.8143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 22.6, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.1619, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 65.2612, + "eval_rougeL_for_task677_ollie_data_to_text": 17.4214, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 54.0, + "eval_rougeL_for_task743_eurlex_title_generation": 27.2136, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 4.2222, + "eval_rougeL_for_task769_qed_title_generation": 84.024, + "eval_rougeL_for_task827_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 42.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 40.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.0524, + "eval_rougeL_for_task892_gap_coreference_resolution": 19.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 24.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 43.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.4577, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 41.3333, + "eval_rougeL_for_title_generation": 28.0366, + "eval_rougeL_for_word_analogy": 20.7083, + "eval_runtime": 422.486, + "eval_samples_per_second": 28.19, + "eval_steps_per_second": 0.883, + "step": 6500 + }, + { + "epoch": 1.6, + "learning_rate": 5e-05, + "loss": 1.3687, + "step": 7000 + }, + { + "epoch": 1.6, + "eval_exact_match": 26.5659, + "eval_exact_match_for_answerability_classification": 51.6154, + "eval_exact_match_for_cause_effect_classification": 36.2857, + "eval_exact_match_for_coreference_resolution": 33.9286, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 36.0, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 37.2, + "eval_exact_match_for_overlap_extraction": 11.5, + "eval_exact_match_for_question_rewriting": 1.4545, + "eval_exact_match_for_task020_mctaco_answerability_classification": 52.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 40.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 23.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 23.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 53.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 15.0, + "eval_exact_match_for_task1153_bard_word_analogy": 6.0, + "eval_exact_match_for_task1154_bard_word_analogy": 13.0, + "eval_exact_match_for_task1155_bard_word_analogy": 57.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 29.0, + "eval_exact_match_for_task1158_bard_word_analogy": 6.0, + "eval_exact_match_for_task1159_bard_word_analogy": 14.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 53.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 27.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 37.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 8.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 60.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 28.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 35.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 62.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 20.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 3.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 48.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 51.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 39.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 14.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 31.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 49.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 49.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 23.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 7.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 42.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 1.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 14.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 14.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 40.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 89.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 7.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 55.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 61.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 48.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 43.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 41.0417, + "eval_exact_match_for_title_generation": 6.7825, + "eval_exact_match_for_word_analogy": 20.375, + "eval_f1": 43.4008, + "eval_f1_for_answerability_classification": 54.1795, + "eval_f1_for_cause_effect_classification": 54.1822, + "eval_f1_for_coreference_resolution": 42.999, + "eval_f1_for_data_to_text": 41.9347, + "eval_f1_for_dialogue_act_recognition": 39.5714, + "eval_f1_for_grammar_error_correction": 60.3477, + "eval_f1_for_keyword_tagging": 50.9196, + "eval_f1_for_overlap_extraction": 31.8974, + "eval_f1_for_question_rewriting": 70.4109, + "eval_f1_for_task020_mctaco_answerability_classification": 52.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_f1_for_task034_winogrande_question_rewriting": 92.4913, + "eval_f1_for_task035_winogrande_question_rewriting": 86.457, + "eval_f1_for_task036_qasc_keyword_tagging": 63.8835, + "eval_f1_for_task039_qasc_overlap_extraction": 30.8333, + "eval_f1_for_task050_multirc_answerability_classification": 53.0, + "eval_f1_for_task102_commongen_data_to_text": 50.8563, + "eval_f1_for_task1152_bard_word_analogy": 15.0, + "eval_f1_for_task1153_bard_word_analogy": 6.6667, + "eval_f1_for_task1154_bard_word_analogy": 13.0, + "eval_f1_for_task1155_bard_word_analogy": 57.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 29.0, + "eval_f1_for_task1158_bard_word_analogy": 6.0, + "eval_f1_for_task1159_bard_word_analogy": 14.0, + "eval_f1_for_task1161_coda_19_title_generation": 26.3891, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.8816, + "eval_f1_for_task121_zest_question_rewriting": 50.494, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.8794, + "eval_f1_for_task1344_rte_textual_entailment": 53.0, + "eval_f1_for_task1345_qqp_question_rewriting": 39.6769, + "eval_f1_for_task1356_xlsum_title_generation": 14.3205, + "eval_f1_for_task1358_xlsum_title_generation": 30.703, + "eval_f1_for_task1385_anli_textual_entailment": 27.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 37.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 8.0, + "eval_f1_for_task1407_dart_data_to_text": 25.2577, + "eval_f1_for_task1409_dart_data_to_text": 43.0192, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 37.4437, + "eval_f1_for_task1439_doqa_answerability_classification": 60.0, + "eval_f1_for_task1442_doqa_answerability_classification": 55.0, + "eval_f1_for_task1516_imppres_textual_entailment": 28.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_f1_for_task1540_peer_read_title_generation": 27.361, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.2517, + "eval_f1_for_task1562_zest_question_rewriting": 55.407, + "eval_f1_for_task1586_scifact_title_generation": 26.6892, + "eval_f1_for_task1598_nyc_data_to_text": 39.1851, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 35.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.2634, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 62.0, + "eval_f1_for_task1631_open_pi_data_to_text": 71.2335, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_f1_for_task1659_billsum_title_generation": 30.2225, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 68.8476, + "eval_f1_for_task1728_web_nlg_data_to_text": 46.4611, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 31.0, + "eval_f1_for_task201_multinli_textual_entailment": 33.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 15.051, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 48.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 45.2333, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 32.9615, + "eval_f1_for_task288_gigaword_title_generation": 24.4362, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 58.9333, + "eval_f1_for_task329_gap_coreference_resolution": 31.0, + "eval_f1_for_task330_gap_coreference_resolution": 55.919, + "eval_f1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.6661, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 30.0, + "eval_f1_for_task402_grailqa_question_rewriting": 74.6881, + "eval_f1_for_task418_persent_title_generation": 20.0875, + "eval_f1_for_task442_com_qa_question_rewriting": 67.3047, + "eval_f1_for_task500_scruples_title_generation": 11.7018, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.2296, + "eval_f1_for_task520_aquamuse_answerability_classification": 42.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 22.0868, + "eval_f1_for_task602_wikitext_title_generation": 11.11, + "eval_f1_for_task613_liar_keyword_tagging": 14.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.9428, + "eval_f1_for_task619_ohsumed_title_generation": 34.1286, + "eval_f1_for_task620_ohsumed_keyword_tagging": 34.9, + "eval_f1_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 32.0, + "eval_f1_for_task642_e_snli_textual_entailment": 40.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 95.1476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 10.0, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.7371, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.1189, + "eval_f1_for_task677_ollie_data_to_text": 18.8175, + "eval_f1_for_task738_perspectrum_textual_entailment": 55.0, + "eval_f1_for_task743_eurlex_title_generation": 28.2144, + "eval_f1_for_task760_msr_sqa_data_to_text": 2.8923, + "eval_f1_for_task769_qed_title_generation": 83.9504, + "eval_f1_for_task827_copa_cause_effect_classification": 48.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 43.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.719, + "eval_f1_for_task892_gap_coreference_resolution": 33.0, + "eval_f1_for_task893_gap_coreference_resolution": 27.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 50.7982, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 41.0417, + "eval_f1_for_title_generation": 27.9027, + "eval_f1_for_word_analogy": 20.4583, + "eval_gen_len": 10.8737, + "eval_global_step": 7000, + "eval_loss": 1.4898345470428467, + "eval_rouge1": 45.0008, + "eval_rouge1_for_answerability_classification": 54.1795, + "eval_rouge1_for_cause_effect_classification": 54.416, + "eval_rouge1_for_coreference_resolution": 43.469, + "eval_rouge1_for_data_to_text": 45.8912, + "eval_rouge1_for_dialogue_act_recognition": 41.6905, + "eval_rouge1_for_grammar_error_correction": 65.7012, + "eval_rouge1_for_keyword_tagging": 56.0119, + "eval_rouge1_for_overlap_extraction": 33.5914, + "eval_rouge1_for_question_rewriting": 71.8739, + "eval_rouge1_for_task020_mctaco_answerability_classification": 52.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.4685, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.3974, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.5118, + "eval_rouge1_for_task039_qasc_overlap_extraction": 33.1667, + "eval_rouge1_for_task050_multirc_answerability_classification": 53.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.8473, + "eval_rouge1_for_task1152_bard_word_analogy": 15.0, + "eval_rouge1_for_task1153_bard_word_analogy": 6.6667, + "eval_rouge1_for_task1154_bard_word_analogy": 13.0, + "eval_rouge1_for_task1155_bard_word_analogy": 57.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 29.0, + "eval_rouge1_for_task1158_bard_word_analogy": 6.0, + "eval_rouge1_for_task1159_bard_word_analogy": 14.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.0344, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 80.1191, + "eval_rouge1_for_task121_zest_question_rewriting": 52.064, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 11.2446, + "eval_rouge1_for_task1344_rte_textual_entailment": 53.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.2735, + "eval_rouge1_for_task1356_xlsum_title_generation": 16.9298, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.6579, + "eval_rouge1_for_task1385_anli_textual_entailment": 27.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 37.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 9.5, + "eval_rouge1_for_task1407_dart_data_to_text": 27.9023, + "eval_rouge1_for_task1409_dart_data_to_text": 43.6163, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 44.854, + "eval_rouge1_for_task1439_doqa_answerability_classification": 60.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 28.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 30.133, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.5485, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.3597, + "eval_rouge1_for_task1586_scifact_title_generation": 29.7725, + "eval_rouge1_for_task1598_nyc_data_to_text": 42.0431, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 78.3333, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.5361, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 62.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 72.1824, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1659_billsum_title_generation": 31.5935, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 68.8476, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.4553, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.4737, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 48.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 45.4, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 34.0162, + "eval_rouge1_for_task288_gigaword_title_generation": 27.124, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 59.8333, + "eval_rouge1_for_task329_gap_coreference_resolution": 31.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 55.8667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.9112, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 35.3333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 76.5725, + "eval_rouge1_for_task418_persent_title_generation": 22.8509, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.9163, + "eval_rouge1_for_task500_scruples_title_generation": 13.3133, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 32.3907, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 42.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 24.2614, + "eval_rouge1_for_task602_wikitext_title_generation": 11.7807, + "eval_rouge1_for_task613_liar_keyword_tagging": 30.3333, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 26.334, + "eval_rouge1_for_task619_ohsumed_title_generation": 36.6044, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 40.5667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 40.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.6476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 10.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.3297, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 68.5759, + "eval_rouge1_for_task677_ollie_data_to_text": 21.2576, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 55.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.344, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.0473, + "eval_rouge1_for_task769_qed_title_generation": 84.0023, + "eval_rouge1_for_task827_copa_cause_effect_classification": 48.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 43.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.9524, + "eval_rouge1_for_task892_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.9644, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.8472, + "eval_rouge1_for_title_generation": 29.9118, + "eval_rouge1_for_word_analogy": 20.4583, + "eval_rougeL": 43.702, + "eval_rougeL_for_answerability_classification": 54.1795, + "eval_rougeL_for_cause_effect_classification": 53.8087, + "eval_rougeL_for_coreference_resolution": 43.469, + "eval_rougeL_for_data_to_text": 38.2752, + "eval_rougeL_for_dialogue_act_recognition": 41.6905, + "eval_rougeL_for_grammar_error_correction": 64.3077, + "eval_rougeL_for_keyword_tagging": 55.3819, + "eval_rougeL_for_overlap_extraction": 32.6692, + "eval_rougeL_for_question_rewriting": 68.4804, + "eval_rougeL_for_task020_mctaco_answerability_classification": 52.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 41.3333, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.4685, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.6577, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.095, + "eval_rougeL_for_task039_qasc_overlap_extraction": 33.1667, + "eval_rougeL_for_task050_multirc_answerability_classification": 53.0, + "eval_rougeL_for_task102_commongen_data_to_text": 52.5863, + "eval_rougeL_for_task1152_bard_word_analogy": 15.0, + "eval_rougeL_for_task1153_bard_word_analogy": 6.6667, + "eval_rougeL_for_task1154_bard_word_analogy": 13.0, + "eval_rougeL_for_task1155_bard_word_analogy": 57.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 29.0, + "eval_rougeL_for_task1158_bard_word_analogy": 6.0, + "eval_rougeL_for_task1159_bard_word_analogy": 14.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.0238, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0173, + "eval_rougeL_for_task121_zest_question_rewriting": 47.0075, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 10.6407, + "eval_rougeL_for_task1344_rte_textual_entailment": 53.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.4008, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.2595, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.1874, + "eval_rougeL_for_task1385_anli_textual_entailment": 27.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 37.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 51.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 46.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 9.5, + "eval_rougeL_for_task1407_dart_data_to_text": 24.3579, + "eval_rougeL_for_task1409_dart_data_to_text": 37.0691, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.9856, + "eval_rougeL_for_task1439_doqa_answerability_classification": 60.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 28.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 28.152, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.6298, + "eval_rougeL_for_task1562_zest_question_rewriting": 50.7179, + "eval_rougeL_for_task1586_scifact_title_generation": 24.6032, + "eval_rougeL_for_task1598_nyc_data_to_text": 32.5695, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 78.3333, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.1282, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 62.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 61.2963, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1659_billsum_title_generation": 27.0197, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 68.8476, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 48.4897, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.2515, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 48.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 51.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 45.4, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 32.1717, + "eval_rougeL_for_task288_gigaword_title_generation": 23.1354, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 59.8333, + "eval_rougeL_for_task329_gap_coreference_resolution": 31.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 55.8667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 49.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.5359, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 35.3333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 64.2138, + "eval_rougeL_for_task418_persent_title_generation": 19.705, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.8353, + "eval_rougeL_for_task500_scruples_title_generation": 12.548, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 31.7575, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 42.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 23.4308, + "eval_rougeL_for_task602_wikitext_title_generation": 11.6306, + "eval_rougeL_for_task613_liar_keyword_tagging": 30.3333, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 23.4582, + "eval_rougeL_for_task619_ohsumed_title_generation": 33.1045, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.8333, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 46.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 40.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.6476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 10.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.1639, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.6731, + "eval_rougeL_for_task677_ollie_data_to_text": 17.7206, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 55.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.6198, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 2.8836, + "eval_rougeL_for_task769_qed_title_generation": 84.0023, + "eval_rougeL_for_task827_copa_cause_effect_classification": 48.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 46.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 43.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.9524, + "eval_rougeL_for_task892_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.3138, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.8472, + "eval_rougeL_for_title_generation": 27.5342, + "eval_rougeL_for_word_analogy": 20.4583, + "eval_runtime": 385.4673, + "eval_samples_per_second": 30.898, + "eval_steps_per_second": 0.968, + "step": 7000 + }, + { + "epoch": 1.71, + "learning_rate": 5e-05, + "loss": 1.335, + "step": 7500 + }, + { + "epoch": 1.71, + "eval_exact_match": 25.8186, + "eval_exact_match_for_answerability_classification": 50.1538, + "eval_exact_match_for_cause_effect_classification": 36.1429, + "eval_exact_match_for_coreference_resolution": 33.8571, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 35.0, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 40.0, + "eval_exact_match_for_overlap_extraction": 14.0, + "eval_exact_match_for_question_rewriting": 0.8182, + "eval_exact_match_for_task020_mctaco_answerability_classification": 48.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 37.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 32.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 28.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 14.0, + "eval_exact_match_for_task1153_bard_word_analogy": 7.0, + "eval_exact_match_for_task1154_bard_word_analogy": 14.0, + "eval_exact_match_for_task1155_bard_word_analogy": 54.0, + "eval_exact_match_for_task1156_bard_word_analogy": 23.0, + "eval_exact_match_for_task1157_bard_word_analogy": 30.0, + "eval_exact_match_for_task1158_bard_word_analogy": 10.0, + "eval_exact_match_for_task1159_bard_word_analogy": 13.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 1.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 48.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 2.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 3.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 31.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 49.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 1.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 57.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 31.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 40.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 3.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_exact_match_for_task1659_billsum_title_generation": 3.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 21.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 2.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task219_rocstories_title_generation": 1.0, + "eval_exact_match_for_task220_rocstories_title_generation": 46.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 46.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 52.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 40.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 14.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 38.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 2.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 1.0, + "eval_exact_match_for_task500_scruples_title_generation": 1.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 1.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 50.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 15.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 32.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 50.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 8.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 54.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 66.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 54.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 39.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 45.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task957_e2e_data_to_text": 0.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 62.0, + "eval_exact_match_for_textual_entailment": 37.75, + "eval_exact_match_for_title_generation": 7.1188, + "eval_exact_match_for_word_analogy": 20.625, + "eval_f1": 42.5195, + "eval_f1_for_answerability_classification": 52.7179, + "eval_f1_for_cause_effect_classification": 54.1263, + "eval_f1_for_coreference_resolution": 42.4451, + "eval_f1_for_data_to_text": 41.9188, + "eval_f1_for_dialogue_act_recognition": 38.5714, + "eval_f1_for_grammar_error_correction": 61.9719, + "eval_f1_for_keyword_tagging": 52.302, + "eval_f1_for_overlap_extraction": 31.8769, + "eval_f1_for_question_rewriting": 69.3446, + "eval_f1_for_task020_mctaco_answerability_classification": 48.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 39.5, + "eval_f1_for_task034_winogrande_question_rewriting": 92.8622, + "eval_f1_for_task035_winogrande_question_rewriting": 86.2231, + "eval_f1_for_task036_qasc_keyword_tagging": 67.3957, + "eval_f1_for_task039_qasc_overlap_extraction": 32.6667, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 51.4832, + "eval_f1_for_task1152_bard_word_analogy": 14.0, + "eval_f1_for_task1153_bard_word_analogy": 13.0, + "eval_f1_for_task1154_bard_word_analogy": 14.0, + "eval_f1_for_task1155_bard_word_analogy": 54.0, + "eval_f1_for_task1156_bard_word_analogy": 23.0, + "eval_f1_for_task1157_bard_word_analogy": 30.0, + "eval_f1_for_task1158_bard_word_analogy": 10.0, + "eval_f1_for_task1159_bard_word_analogy": 13.0, + "eval_f1_for_task1161_coda_19_title_generation": 27.0709, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.1415, + "eval_f1_for_task121_zest_question_rewriting": 46.5939, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 8.8915, + "eval_f1_for_task1344_rte_textual_entailment": 48.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.4116, + "eval_f1_for_task1356_xlsum_title_generation": 14.3734, + "eval_f1_for_task1358_xlsum_title_generation": 30.9498, + "eval_f1_for_task1385_anli_textual_entailment": 2.0, + "eval_f1_for_task1386_anli_textual_entailment": 2.0, + "eval_f1_for_task1387_anli_textual_entailment": 3.0, + "eval_f1_for_task1388_cb_textual_entailment": 31.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 1.0, + "eval_f1_for_task1407_dart_data_to_text": 26.7614, + "eval_f1_for_task1409_dart_data_to_text": 42.7442, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 40.2209, + "eval_f1_for_task1439_doqa_answerability_classification": 57.0, + "eval_f1_for_task1442_doqa_answerability_classification": 55.0, + "eval_f1_for_task1516_imppres_textual_entailment": 31.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 27.3023, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.723, + "eval_f1_for_task1562_zest_question_rewriting": 56.7447, + "eval_f1_for_task1586_scifact_title_generation": 28.1866, + "eval_f1_for_task1598_nyc_data_to_text": 39.2628, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 40.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.7348, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_f1_for_task1631_open_pi_data_to_text": 68.3973, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_f1_for_task1659_billsum_title_generation": 32.2502, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 65.4333, + "eval_f1_for_task1728_web_nlg_data_to_text": 47.8, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 31.0, + "eval_f1_for_task219_rocstories_title_generation": 14.6953, + "eval_f1_for_task220_rocstories_title_generation": 46.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 46.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 49.8571, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 31.0872, + "eval_f1_for_task288_gigaword_title_generation": 24.5057, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 55.9222, + "eval_f1_for_task329_gap_coreference_resolution": 30.0, + "eval_f1_for_task330_gap_coreference_resolution": 43.7333, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 34.2819, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 25.8333, + "eval_f1_for_task402_grailqa_question_rewriting": 68.5456, + "eval_f1_for_task418_persent_title_generation": 22.1123, + "eval_f1_for_task442_com_qa_question_rewriting": 66.4643, + "eval_f1_for_task500_scruples_title_generation": 12.177, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.1936, + "eval_f1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 20.059, + "eval_f1_for_task602_wikitext_title_generation": 12.5226, + "eval_f1_for_task613_liar_keyword_tagging": 15.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 24.9359, + "eval_f1_for_task619_ohsumed_title_generation": 33.8581, + "eval_f1_for_task620_ohsumed_keyword_tagging": 35.8, + "eval_f1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 32.0, + "eval_f1_for_task642_e_snli_textual_entailment": 50.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 93.6476, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 11.4667, + "eval_f1_for_task670_ambigqa_question_rewriting": 79.4492, + "eval_f1_for_task671_ambigqa_question_rewriting": 67.6194, + "eval_f1_for_task677_ollie_data_to_text": 20.4611, + "eval_f1_for_task738_perspectrum_textual_entailment": 54.0, + "eval_f1_for_task743_eurlex_title_generation": 30.1277, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.4767, + "eval_f1_for_task769_qed_title_generation": 85.0972, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 54.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 39.0, + "eval_f1_for_task891_gap_coreference_resolution": 53.4857, + "eval_f1_for_task892_gap_coreference_resolution": 35.0, + "eval_f1_for_task893_gap_coreference_resolution": 35.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task957_e2e_data_to_text": 48.4356, + "eval_f1_for_task970_sherliic_textual_entailment": 62.0, + "eval_f1_for_textual_entailment": 37.75, + "eval_f1_for_title_generation": 28.0476, + "eval_f1_for_word_analogy": 21.375, + "eval_gen_len": 10.0572, + "eval_global_step": 7500, + "eval_loss": 1.5025126934051514, + "eval_rouge1": 44.9287, + "eval_rouge1_for_answerability_classification": 52.7179, + "eval_rouge1_for_cause_effect_classification": 54.3561, + "eval_rouge1_for_coreference_resolution": 43.2475, + "eval_rouge1_for_data_to_text": 45.5608, + "eval_rouge1_for_dialogue_act_recognition": 40.619, + "eval_rouge1_for_grammar_error_correction": 65.4573, + "eval_rouge1_for_keyword_tagging": 57.334, + "eval_rouge1_for_overlap_extraction": 33.7495, + "eval_rouge1_for_question_rewriting": 70.9072, + "eval_rouge1_for_task020_mctaco_answerability_classification": 48.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.8384, + "eval_rouge1_for_task035_winogrande_question_rewriting": 86.9227, + "eval_rouge1_for_task036_qasc_keyword_tagging": 71.2224, + "eval_rouge1_for_task039_qasc_overlap_extraction": 35.5, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 64.4195, + "eval_rouge1_for_task1152_bard_word_analogy": 14.0, + "eval_rouge1_for_task1153_bard_word_analogy": 13.0, + "eval_rouge1_for_task1154_bard_word_analogy": 14.0, + "eval_rouge1_for_task1155_bard_word_analogy": 54.0, + "eval_rouge1_for_task1156_bard_word_analogy": 23.0, + "eval_rouge1_for_task1157_bard_word_analogy": 30.0, + "eval_rouge1_for_task1158_bard_word_analogy": 10.0, + "eval_rouge1_for_task1159_bard_word_analogy": 13.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 30.3547, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.4045, + "eval_rouge1_for_task121_zest_question_rewriting": 48.6693, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 9.1992, + "eval_rouge1_for_task1344_rte_textual_entailment": 48.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.778, + "eval_rouge1_for_task1356_xlsum_title_generation": 16.7419, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.4586, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rouge1_for_task1407_dart_data_to_text": 28.8235, + "eval_rouge1_for_task1409_dart_data_to_text": 43.0834, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 43.9107, + "eval_rouge1_for_task1439_doqa_answerability_classification": 57.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 31.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 29.5749, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 87.0039, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.9194, + "eval_rouge1_for_task1586_scifact_title_generation": 30.9326, + "eval_rouge1_for_task1598_nyc_data_to_text": 41.8619, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 80.0, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 79.0265, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 69.1946, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rouge1_for_task1659_billsum_title_generation": 33.8351, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 65.4333, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 56.7724, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.9152, + "eval_rouge1_for_task220_rocstories_title_generation": 46.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 46.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 50.0238, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 31.999, + "eval_rouge1_for_task288_gigaword_title_generation": 27.162, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 57.8222, + "eval_rouge1_for_task329_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 44.1667, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.6485, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 31.8333, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.5087, + "eval_rouge1_for_task418_persent_title_generation": 24.4509, + "eval_rouge1_for_task442_com_qa_question_rewriting": 69.9177, + "eval_rouge1_for_task500_scruples_title_generation": 14.1171, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 32.6629, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 23.5799, + "eval_rouge1_for_task602_wikitext_title_generation": 13.7025, + "eval_rouge1_for_task613_liar_keyword_tagging": 31.0, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 26.1777, + "eval_rouge1_for_task619_ohsumed_title_generation": 36.5179, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.3, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 32.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 50.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.1476, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 11.4667, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 80.4933, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 68.5008, + "eval_rouge1_for_task677_ollie_data_to_text": 22.6402, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 54.0, + "eval_rouge1_for_task743_eurlex_title_generation": 31.6604, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.6326, + "eval_rouge1_for_task769_qed_title_generation": 85.3186, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 54.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 39.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 53.719, + "eval_rouge1_for_task892_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 35.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task957_e2e_data_to_text": 48.5923, + "eval_rouge1_for_task970_sherliic_textual_entailment": 62.0, + "eval_rouge1_for_textual_entailment": 43.5833, + "eval_rouge1_for_title_generation": 30.1004, + "eval_rouge1_for_word_analogy": 21.375, + "eval_rougeL": 43.6158, + "eval_rougeL_for_answerability_classification": 52.7179, + "eval_rougeL_for_cause_effect_classification": 53.6659, + "eval_rougeL_for_coreference_resolution": 43.2475, + "eval_rougeL_for_data_to_text": 37.9051, + "eval_rougeL_for_dialogue_act_recognition": 40.619, + "eval_rougeL_for_grammar_error_correction": 64.1205, + "eval_rougeL_for_keyword_tagging": 56.5918, + "eval_rougeL_for_overlap_extraction": 32.9765, + "eval_rougeL_for_question_rewriting": 67.5419, + "eval_rougeL_for_task020_mctaco_answerability_classification": 48.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.8384, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.3954, + "eval_rougeL_for_task036_qasc_keyword_tagging": 69.3947, + "eval_rougeL_for_task039_qasc_overlap_extraction": 35.5, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 53.9731, + "eval_rougeL_for_task1152_bard_word_analogy": 14.0, + "eval_rougeL_for_task1153_bard_word_analogy": 13.0, + "eval_rougeL_for_task1154_bard_word_analogy": 14.0, + "eval_rougeL_for_task1155_bard_word_analogy": 54.0, + "eval_rougeL_for_task1156_bard_word_analogy": 23.0, + "eval_rougeL_for_task1157_bard_word_analogy": 30.0, + "eval_rougeL_for_task1158_bard_word_analogy": 10.0, + "eval_rougeL_for_task1159_bard_word_analogy": 13.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.4578, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.6885, + "eval_rougeL_for_task121_zest_question_rewriting": 43.1137, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 8.9143, + "eval_rougeL_for_task1344_rte_textual_entailment": 48.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8408, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.5105, + "eval_rougeL_for_task1358_xlsum_title_generation": 30.1906, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 50.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 49.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 49.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 2.0, + "eval_rougeL_for_task1407_dart_data_to_text": 24.8691, + "eval_rougeL_for_task1409_dart_data_to_text": 36.5521, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 42.1408, + "eval_rougeL_for_task1439_doqa_answerability_classification": 57.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 31.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 31.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 27.8238, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 86.1001, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.906, + "eval_rougeL_for_task1586_scifact_title_generation": 25.691, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.5361, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 80.0, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.6578, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 51.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 57.2376, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 48.0, + "eval_rougeL_for_task1659_billsum_title_generation": 29.2856, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 65.4333, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.1058, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task219_rocstories_title_generation": 18.693, + "eval_rougeL_for_task220_rocstories_title_generation": 46.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 44.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 46.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 50.0238, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 30.4529, + "eval_rougeL_for_task288_gigaword_title_generation": 23.4207, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 57.8222, + "eval_rougeL_for_task329_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 44.1667, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.0614, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 31.8333, + "eval_rougeL_for_task402_grailqa_question_rewriting": 58.825, + "eval_rougeL_for_task418_persent_title_generation": 21.343, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.1658, + "eval_rougeL_for_task500_scruples_title_generation": 12.7724, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 31.9486, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 50.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 22.5908, + "eval_rougeL_for_task602_wikitext_title_generation": 13.4229, + "eval_rougeL_for_task613_liar_keyword_tagging": 31.0, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 22.9333, + "eval_rougeL_for_task619_ohsumed_title_generation": 32.5641, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.4167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 49.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 32.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 50.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.1476, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 11.4667, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 79.283, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.2469, + "eval_rougeL_for_task677_ollie_data_to_text": 18.5785, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 54.0, + "eval_rougeL_for_task743_eurlex_title_generation": 25.8366, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.2026, + "eval_rougeL_for_task769_qed_title_generation": 85.3186, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 54.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 43.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 39.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 53.719, + "eval_rougeL_for_task892_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 35.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 49.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.4109, + "eval_rougeL_for_task970_sherliic_textual_entailment": 62.0, + "eval_rougeL_for_textual_entailment": 43.5833, + "eval_rougeL_for_title_generation": 27.6702, + "eval_rougeL_for_word_analogy": 21.375, + "eval_runtime": 413.5195, + "eval_samples_per_second": 28.802, + "eval_steps_per_second": 0.902, + "step": 7500 + }, + { + "epoch": 1.83, + "learning_rate": 5e-05, + "loss": 1.327, + "step": 8000 + }, + { + "epoch": 1.83, + "eval_exact_match": 26.4652, + "eval_exact_match_for_answerability_classification": 51.4615, + "eval_exact_match_for_cause_effect_classification": 36.1429, + "eval_exact_match_for_coreference_resolution": 33.7857, + "eval_exact_match_for_data_to_text": 0.4843, + "eval_exact_match_for_dialogue_act_recognition": 36.2857, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 34.0, + "eval_exact_match_for_overlap_extraction": 13.5, + "eval_exact_match_for_question_rewriting": 1.1818, + "eval_exact_match_for_task020_mctaco_answerability_classification": 51.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 36.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 0.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 23.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 27.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 14.0, + "eval_exact_match_for_task1153_bard_word_analogy": 8.0, + "eval_exact_match_for_task1154_bard_word_analogy": 12.0, + "eval_exact_match_for_task1155_bard_word_analogy": 59.0, + "eval_exact_match_for_task1156_bard_word_analogy": 21.0, + "eval_exact_match_for_task1157_bard_word_analogy": 32.0, + "eval_exact_match_for_task1158_bard_word_analogy": 13.0, + "eval_exact_match_for_task1159_bard_word_analogy": 15.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 2.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 50.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 28.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 34.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 39.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 43.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 53.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 17.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 48.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 4.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 14.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 3.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 28.0, + "eval_exact_match_for_task219_rocstories_title_generation": 3.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 52.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 37.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 17.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 32.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 35.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 50.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 22.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 52.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 2.381, + "eval_exact_match_for_task613_liar_keyword_tagging": 12.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 2.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 31.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 31.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 37.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 88.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 22.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 1.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 51.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 58.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 53.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 42.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 41.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 27.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 37.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 44.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 40.7083, + "eval_exact_match_for_title_generation": 6.9507, + "eval_exact_match_for_word_analogy": 21.75, + "eval_f1": 43.3565, + "eval_f1_for_answerability_classification": 54.0256, + "eval_f1_for_cause_effect_classification": 54.1602, + "eval_f1_for_coreference_resolution": 43.0609, + "eval_f1_for_data_to_text": 42.3349, + "eval_f1_for_dialogue_act_recognition": 39.8571, + "eval_f1_for_grammar_error_correction": 65.2939, + "eval_f1_for_keyword_tagging": 48.2603, + "eval_f1_for_overlap_extraction": 32.0122, + "eval_f1_for_question_rewriting": 70.2662, + "eval_f1_for_task020_mctaco_answerability_classification": 51.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 38.5, + "eval_f1_for_task034_winogrande_question_rewriting": 92.9096, + "eval_f1_for_task035_winogrande_question_rewriting": 86.7521, + "eval_f1_for_task036_qasc_keyword_tagging": 64.4776, + "eval_f1_for_task039_qasc_overlap_extraction": 33.2222, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 50.5573, + "eval_f1_for_task1152_bard_word_analogy": 14.0, + "eval_f1_for_task1153_bard_word_analogy": 10.0, + "eval_f1_for_task1154_bard_word_analogy": 12.0, + "eval_f1_for_task1155_bard_word_analogy": 59.0, + "eval_f1_for_task1156_bard_word_analogy": 21.0, + "eval_f1_for_task1157_bard_word_analogy": 32.0, + "eval_f1_for_task1158_bard_word_analogy": 13.0, + "eval_f1_for_task1159_bard_word_analogy": 15.0, + "eval_f1_for_task1161_coda_19_title_generation": 25.7541, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.7071, + "eval_f1_for_task121_zest_question_rewriting": 49.0666, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.0555, + "eval_f1_for_task1344_rte_textual_entailment": 50.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.2575, + "eval_f1_for_task1356_xlsum_title_generation": 14.4385, + "eval_f1_for_task1358_xlsum_title_generation": 30.925, + "eval_f1_for_task1385_anli_textual_entailment": 28.0, + "eval_f1_for_task1386_anli_textual_entailment": 34.0, + "eval_f1_for_task1387_anli_textual_entailment": 39.0, + "eval_f1_for_task1388_cb_textual_entailment": 43.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 4.0, + "eval_f1_for_task1407_dart_data_to_text": 27.5726, + "eval_f1_for_task1409_dart_data_to_text": 43.5521, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 47.1928, + "eval_f1_for_task1439_doqa_answerability_classification": 53.0, + "eval_f1_for_task1442_doqa_answerability_classification": 55.0, + "eval_f1_for_task1516_imppres_textual_entailment": 17.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_f1_for_task1540_peer_read_title_generation": 26.9787, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.3951, + "eval_f1_for_task1562_zest_question_rewriting": 56.5313, + "eval_f1_for_task1586_scifact_title_generation": 28.3706, + "eval_f1_for_task1598_nyc_data_to_text": 40.1024, + "eval_f1_for_task1612_sick_textual_entailment": 33.0, + "eval_f1_for_task1615_sick_textual_entailment": 48.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.3354, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_f1_for_task1631_open_pi_data_to_text": 69.8499, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_f1_for_task1659_billsum_title_generation": 31.0507, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 62.0952, + "eval_f1_for_task1728_web_nlg_data_to_text": 46.8675, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 34.0, + "eval_f1_for_task202_multinli_textual_entailment": 28.0, + "eval_f1_for_task219_rocstories_title_generation": 15.5965, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 50.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 45.1667, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 30.8021, + "eval_f1_for_task288_gigaword_title_generation": 24.2263, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 56.7857, + "eval_f1_for_task329_gap_coreference_resolution": 32.0, + "eval_f1_for_task330_gap_coreference_resolution": 41.319, + "eval_f1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.9957, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.1667, + "eval_f1_for_task402_grailqa_question_rewriting": 73.201, + "eval_f1_for_task418_persent_title_generation": 18.4927, + "eval_f1_for_task442_com_qa_question_rewriting": 66.5851, + "eval_f1_for_task500_scruples_title_generation": 11.0151, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 32.0188, + "eval_f1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 18.0675, + "eval_f1_for_task602_wikitext_title_generation": 12.0162, + "eval_f1_for_task613_liar_keyword_tagging": 13.3333, + "eval_f1_for_task614_glucose_cause_effect_classification": 25.4588, + "eval_f1_for_task619_ohsumed_title_generation": 34.5246, + "eval_f1_for_task620_ohsumed_keyword_tagging": 37.7333, + "eval_f1_for_task623_ohsumed_keyword_tagging": 31.0, + "eval_f1_for_task640_e_snli_textual_entailment": 31.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 37.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.7571, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 31.1, + "eval_f1_for_task670_ambigqa_question_rewriting": 81.5062, + "eval_f1_for_task671_ambigqa_question_rewriting": 68.0764, + "eval_f1_for_task677_ollie_data_to_text": 18.9273, + "eval_f1_for_task738_perspectrum_textual_entailment": 51.0, + "eval_f1_for_task743_eurlex_title_generation": 29.0067, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.131, + "eval_f1_for_task769_qed_title_generation": 78.3118, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 53.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 42.0, + "eval_f1_for_task891_gap_coreference_resolution": 49.719, + "eval_f1_for_task892_gap_coreference_resolution": 27.0, + "eval_f1_for_task893_gap_coreference_resolution": 37.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 44.0, + "eval_f1_for_task957_e2e_data_to_text": 51.4429, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 40.7083, + "eval_f1_for_title_generation": 27.4062, + "eval_f1_for_word_analogy": 22.0, + "eval_gen_len": 10.6055, + "eval_global_step": 8000, + "eval_loss": 1.498915672302246, + "eval_rouge1": 44.8903, + "eval_rouge1_for_answerability_classification": 54.0256, + "eval_rouge1_for_cause_effect_classification": 54.3871, + "eval_rouge1_for_coreference_resolution": 43.8051, + "eval_rouge1_for_data_to_text": 46.3895, + "eval_rouge1_for_dialogue_act_recognition": 42.2619, + "eval_rouge1_for_grammar_error_correction": 68.8214, + "eval_rouge1_for_keyword_tagging": 52.5885, + "eval_rouge1_for_overlap_extraction": 34.47, + "eval_rouge1_for_question_rewriting": 71.7915, + "eval_rouge1_for_task020_mctaco_answerability_classification": 51.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 41.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.9868, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.453, + "eval_rouge1_for_task036_qasc_keyword_tagging": 67.6853, + "eval_rouge1_for_task039_qasc_overlap_extraction": 37.0556, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.3615, + "eval_rouge1_for_task1152_bard_word_analogy": 14.0, + "eval_rouge1_for_task1153_bard_word_analogy": 10.0, + "eval_rouge1_for_task1154_bard_word_analogy": 12.0, + "eval_rouge1_for_task1155_bard_word_analogy": 59.0, + "eval_rouge1_for_task1156_bard_word_analogy": 21.0, + "eval_rouge1_for_task1157_bard_word_analogy": 32.0, + "eval_rouge1_for_task1158_bard_word_analogy": 13.0, + "eval_rouge1_for_task1159_bard_word_analogy": 15.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 29.1746, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.9701, + "eval_rouge1_for_task121_zest_question_rewriting": 51.1709, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.2726, + "eval_rouge1_for_task1344_rte_textual_entailment": 50.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 43.7563, + "eval_rouge1_for_task1356_xlsum_title_generation": 17.293, + "eval_rouge1_for_task1358_xlsum_title_generation": 34.6466, + "eval_rouge1_for_task1385_anli_textual_entailment": 28.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 34.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 39.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 43.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 7.5, + "eval_rouge1_for_task1407_dart_data_to_text": 29.5183, + "eval_rouge1_for_task1409_dart_data_to_text": 44.1107, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 50.9896, + "eval_rouge1_for_task1439_doqa_answerability_classification": 53.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 17.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 29.1137, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6531, + "eval_rouge1_for_task1562_zest_question_rewriting": 59.481, + "eval_rouge1_for_task1586_scifact_title_generation": 31.808, + "eval_rouge1_for_task1598_nyc_data_to_text": 42.5038, + "eval_rouge1_for_task1612_sick_textual_entailment": 33.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 82.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.6082, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 70.9261, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1659_billsum_title_generation": 32.6681, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 62.0952, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 57.899, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 28.0, + "eval_rouge1_for_task219_rocstories_title_generation": 19.5156, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 45.2857, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 31.8844, + "eval_rouge1_for_task288_gigaword_title_generation": 26.9623, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 58.6857, + "eval_rouge1_for_task329_gap_coreference_resolution": 32.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 41.7524, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 34.3584, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 34.5, + "eval_rouge1_for_task402_grailqa_question_rewriting": 75.1782, + "eval_rouge1_for_task418_persent_title_generation": 21.2661, + "eval_rouge1_for_task442_com_qa_question_rewriting": 70.2474, + "eval_rouge1_for_task500_scruples_title_generation": 12.5033, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 32.113, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 20.1144, + "eval_rouge1_for_task602_wikitext_title_generation": 12.8956, + "eval_rouge1_for_task613_liar_keyword_tagging": 27.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 26.6846, + "eval_rouge1_for_task619_ohsumed_title_generation": 36.588, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 41.3333, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 31.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 31.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 37.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 95.2571, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 31.0, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 82.1543, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 68.7006, + "eval_rouge1_for_task677_ollie_data_to_text": 22.0078, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 51.0, + "eval_rouge1_for_task743_eurlex_title_generation": 30.9513, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.3249, + "eval_rouge1_for_task769_qed_title_generation": 78.3935, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 53.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 42.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 49.9524, + "eval_rouge1_for_task892_gap_coreference_resolution": 27.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 37.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 44.0, + "eval_rouge1_for_task957_e2e_data_to_text": 51.986, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 42.1528, + "eval_rouge1_for_title_generation": 29.3843, + "eval_rouge1_for_word_analogy": 22.0, + "eval_rougeL": 43.5627, + "eval_rougeL_for_answerability_classification": 54.0256, + "eval_rougeL_for_cause_effect_classification": 53.7869, + "eval_rougeL_for_coreference_resolution": 43.7813, + "eval_rougeL_for_data_to_text": 38.453, + "eval_rougeL_for_dialogue_act_recognition": 42.2619, + "eval_rougeL_for_grammar_error_correction": 67.2267, + "eval_rougeL_for_keyword_tagging": 51.8383, + "eval_rougeL_for_overlap_extraction": 33.7687, + "eval_rougeL_for_question_rewriting": 68.3758, + "eval_rougeL_for_task020_mctaco_answerability_classification": 51.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 41.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.9868, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.9639, + "eval_rougeL_for_task036_qasc_keyword_tagging": 65.7511, + "eval_rougeL_for_task039_qasc_overlap_extraction": 37.0556, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.2046, + "eval_rougeL_for_task1152_bard_word_analogy": 14.0, + "eval_rougeL_for_task1153_bard_word_analogy": 10.0, + "eval_rougeL_for_task1154_bard_word_analogy": 12.0, + "eval_rougeL_for_task1155_bard_word_analogy": 59.0, + "eval_rougeL_for_task1156_bard_word_analogy": 21.0, + "eval_rougeL_for_task1157_bard_word_analogy": 32.0, + "eval_rougeL_for_task1158_bard_word_analogy": 13.0, + "eval_rougeL_for_task1159_bard_word_analogy": 15.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 24.7179, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 79.0009, + "eval_rougeL_for_task121_zest_question_rewriting": 45.5726, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.9585, + "eval_rougeL_for_task1344_rte_textual_entailment": 50.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 40.8944, + "eval_rougeL_for_task1356_xlsum_title_generation": 14.3207, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.9641, + "eval_rougeL_for_task1385_anli_textual_entailment": 28.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 34.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 39.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 43.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 53.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 7.5, + "eval_rougeL_for_task1407_dart_data_to_text": 25.4489, + "eval_rougeL_for_task1409_dart_data_to_text": 37.5694, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 48.7041, + "eval_rougeL_for_task1439_doqa_answerability_classification": 53.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 17.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 35.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 48.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 50.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 27.3687, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.7492, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.5419, + "eval_rougeL_for_task1586_scifact_title_generation": 25.7079, + "eval_rougeL_for_task1598_nyc_data_to_text": 31.2938, + "eval_rougeL_for_task1612_sick_textual_entailment": 33.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 82.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.3113, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 55.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 59.2426, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1659_billsum_title_generation": 28.275, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 61.7619, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 48.7233, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 28.0, + "eval_rougeL_for_task219_rocstories_title_generation": 19.3823, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 52.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 45.2857, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 30.4818, + "eval_rougeL_for_task288_gigaword_title_generation": 22.9428, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 58.6857, + "eval_rougeL_for_task329_gap_coreference_resolution": 32.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 41.7524, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 50.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 33.0104, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 34.5, + "eval_rougeL_for_task402_grailqa_question_rewriting": 62.6511, + "eval_rougeL_for_task418_persent_title_generation": 18.6092, + "eval_rougeL_for_task442_com_qa_question_rewriting": 66.2321, + "eval_rougeL_for_task500_scruples_title_generation": 11.4428, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 31.3154, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 52.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 19.4361, + "eval_rougeL_for_task602_wikitext_title_generation": 12.616, + "eval_rougeL_for_task613_liar_keyword_tagging": 27.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 23.831, + "eval_rougeL_for_task619_ohsumed_title_generation": 33.4514, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 39.5167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 31.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 31.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 37.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 95.2571, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 31.0, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 81.1991, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 67.7798, + "eval_rougeL_for_task677_ollie_data_to_text": 18.5454, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 51.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.2185, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.0984, + "eval_rougeL_for_task769_qed_title_generation": 78.3935, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 53.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 42.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 49.9524, + "eval_rougeL_for_task892_gap_coreference_resolution": 27.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 37.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 50.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 51.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 44.0, + "eval_rougeL_for_task957_e2e_data_to_text": 41.7879, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 42.1528, + "eval_rougeL_for_title_generation": 27.0237, + "eval_rougeL_for_word_analogy": 22.0, + "eval_runtime": 430.6007, + "eval_samples_per_second": 27.659, + "eval_steps_per_second": 0.866, + "step": 8000 + }, + { + "epoch": 1.94, + "learning_rate": 5e-05, + "loss": 1.3611, + "step": 8500 + }, + { + "epoch": 1.94, + "eval_exact_match": 25.911, + "eval_exact_match_for_answerability_classification": 51.6154, + "eval_exact_match_for_cause_effect_classification": 36.0, + "eval_exact_match_for_coreference_resolution": 33.3571, + "eval_exact_match_for_data_to_text": 0.2421, + "eval_exact_match_for_dialogue_act_recognition": 35.1429, + "eval_exact_match_for_grammar_error_correction": 7.0, + "eval_exact_match_for_keyword_tagging": 36.0, + "eval_exact_match_for_overlap_extraction": 15.5, + "eval_exact_match_for_question_rewriting": 1.0909, + "eval_exact_match_for_task020_mctaco_answerability_classification": 53.0, + "eval_exact_match_for_task033_winogrande_coreference_resolution": 38.0, + "eval_exact_match_for_task034_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task035_winogrande_question_rewriting": 1.0, + "eval_exact_match_for_task036_qasc_keyword_tagging": 22.0, + "eval_exact_match_for_task039_qasc_overlap_extraction": 31.0, + "eval_exact_match_for_task050_multirc_answerability_classification": 51.0, + "eval_exact_match_for_task102_commongen_data_to_text": 0.0, + "eval_exact_match_for_task1152_bard_word_analogy": 14.0, + "eval_exact_match_for_task1153_bard_word_analogy": 6.0, + "eval_exact_match_for_task1154_bard_word_analogy": 13.0, + "eval_exact_match_for_task1155_bard_word_analogy": 54.0, + "eval_exact_match_for_task1156_bard_word_analogy": 21.0, + "eval_exact_match_for_task1157_bard_word_analogy": 33.0, + "eval_exact_match_for_task1158_bard_word_analogy": 9.0, + "eval_exact_match_for_task1159_bard_word_analogy": 15.0, + "eval_exact_match_for_task1161_coda_19_title_generation": 0.0, + "eval_exact_match_for_task1195_disfl_qa_question_rewriting": 1.0, + "eval_exact_match_for_task121_zest_question_rewriting": 0.0, + "eval_exact_match_for_task133_winowhy_coreference_resolution": 50.0, + "eval_exact_match_for_task1342_amazon_us_reviews_title_generation": 0.0, + "eval_exact_match_for_task1344_rte_textual_entailment": 57.0, + "eval_exact_match_for_task1345_qqp_question_rewriting": 0.0, + "eval_exact_match_for_task1356_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1358_xlsum_title_generation": 0.0, + "eval_exact_match_for_task1385_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1386_anli_textual_entailment": 33.0, + "eval_exact_match_for_task1387_anli_textual_entailment": 32.0, + "eval_exact_match_for_task1388_cb_textual_entailment": 40.0, + "eval_exact_match_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_exact_match_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_exact_match_for_task1393_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_exact_match_for_task1407_dart_data_to_text": 0.0, + "eval_exact_match_for_task1409_dart_data_to_text": 0.0, + "eval_exact_match_for_task1415_youtube_caption_corrections_grammar_error_correction": 0.0, + "eval_exact_match_for_task1439_doqa_answerability_classification": 51.0, + "eval_exact_match_for_task1442_doqa_answerability_classification": 55.0, + "eval_exact_match_for_task1516_imppres_textual_entailment": 21.0, + "eval_exact_match_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_exact_match_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_exact_match_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_exact_match_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_exact_match_for_task1540_peer_read_title_generation": 0.0, + "eval_exact_match_for_task1554_scitail_textual_entailment": 50.0, + "eval_exact_match_for_task1557_jfleg_grammar_error_correction": 14.0, + "eval_exact_match_for_task1562_zest_question_rewriting": 0.0, + "eval_exact_match_for_task1586_scifact_title_generation": 0.0, + "eval_exact_match_for_task1598_nyc_data_to_text": 0.0, + "eval_exact_match_for_task1612_sick_textual_entailment": 36.0, + "eval_exact_match_for_task1615_sick_textual_entailment": 33.0, + "eval_exact_match_for_task1622_disfl_qa_question_rewriting": 2.0, + "eval_exact_match_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_exact_match_for_task1631_open_pi_data_to_text": 0.0, + "eval_exact_match_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_exact_match_for_task1659_billsum_title_generation": 2.0, + "eval_exact_match_for_task1664_wino_bias_coreference_resolution": 23.0, + "eval_exact_match_for_task1728_web_nlg_data_to_text": 1.0, + "eval_exact_match_for_task190_snli_textual_entailment": 50.0, + "eval_exact_match_for_task199_multinli_textual_entailment": 50.0, + "eval_exact_match_for_task200_multinli_textual_entailment": 34.0, + "eval_exact_match_for_task201_multinli_textual_entailment": 31.0, + "eval_exact_match_for_task202_multinli_textual_entailment": 33.0, + "eval_exact_match_for_task219_rocstories_title_generation": 2.0, + "eval_exact_match_for_task220_rocstories_title_generation": 50.0, + "eval_exact_match_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_exact_match_for_task232_iirc_answerability_classification": 50.0, + "eval_exact_match_for_task233_iirc_answerability_classification": 49.0, + "eval_exact_match_for_task242_tweetqa_answerability_classification": 55.0, + "eval_exact_match_for_task249_enhanced_wsc_coreference_resolution": 30.0, + "eval_exact_match_for_task281_points_of_correspondence_overlap_extraction": 0.0, + "eval_exact_match_for_task288_gigaword_title_generation": 0.0, + "eval_exact_match_for_task290_tellmewhy_answerability_classification": 50.0, + "eval_exact_match_for_task304_numeric_fused_head_coreference_resolution": 12.0, + "eval_exact_match_for_task329_gap_coreference_resolution": 33.0, + "eval_exact_match_for_task330_gap_coreference_resolution": 42.0, + "eval_exact_match_for_task349_squad2.0_answerability_classification": 41.0, + "eval_exact_match_for_task362_spolin_dialogue_act_recognition": 50.0, + "eval_exact_match_for_task391_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task392_cod3s_cause_effect_classification": 50.0, + "eval_exact_match_for_task393_cod3s_cause_effect_classification": 0.0, + "eval_exact_match_for_task401_numeric_fused_head_coreference_resolution": 24.0, + "eval_exact_match_for_task402_grailqa_question_rewriting": 5.0, + "eval_exact_match_for_task418_persent_title_generation": 0.0, + "eval_exact_match_for_task442_com_qa_question_rewriting": 2.0, + "eval_exact_match_for_task500_scruples_title_generation": 0.0, + "eval_exact_match_for_task510_reddit_tifu_dataset_title_generation": 2.0, + "eval_exact_match_for_task520_aquamuse_answerability_classification": 62.0, + "eval_exact_match_for_task569_recipe_nlg_title_generation": 2.0, + "eval_exact_match_for_task602_wikitext_title_generation": 3.5714, + "eval_exact_match_for_task613_liar_keyword_tagging": 13.0, + "eval_exact_match_for_task614_glucose_cause_effect_classification": 0.0, + "eval_exact_match_for_task619_ohsumed_title_generation": 1.0, + "eval_exact_match_for_task620_ohsumed_keyword_tagging": 16.0, + "eval_exact_match_for_task623_ohsumed_keyword_tagging": 43.0, + "eval_exact_match_for_task640_e_snli_textual_entailment": 24.0, + "eval_exact_match_for_task641_e_snli_textual_entailment": 33.0, + "eval_exact_match_for_task642_e_snli_textual_entailment": 46.0, + "eval_exact_match_for_task645_wiki_auto_all_data_keyword_tagging": 86.0, + "eval_exact_match_for_task648_winograd_wsc_coreference_resolution": 16.0, + "eval_exact_match_for_task670_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task671_ambigqa_question_rewriting": 0.0, + "eval_exact_match_for_task677_ollie_data_to_text": 0.0, + "eval_exact_match_for_task738_perspectrum_textual_entailment": 50.0, + "eval_exact_match_for_task743_eurlex_title_generation": 1.0, + "eval_exact_match_for_task760_msr_sqa_data_to_text": 0.0, + "eval_exact_match_for_task769_qed_title_generation": 60.0, + "eval_exact_match_for_task827_copa_cause_effect_classification": 50.0, + "eval_exact_match_for_task828_copa_cause_effect_classification": 52.0, + "eval_exact_match_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_exact_match_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_exact_match_for_task890_gwsd_textual_entailment": 41.0, + "eval_exact_match_for_task891_gap_coreference_resolution": 44.0, + "eval_exact_match_for_task892_gap_coreference_resolution": 19.0, + "eval_exact_match_for_task893_gap_coreference_resolution": 30.0, + "eval_exact_match_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_exact_match_for_task936_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_exact_match_for_task937_defeasible_nli_atomic_textual_entailment": 3.0, + "eval_exact_match_for_task957_e2e_data_to_text": 1.0, + "eval_exact_match_for_task970_sherliic_textual_entailment": 50.0, + "eval_exact_match_for_textual_entailment": 38.4583, + "eval_exact_match_for_title_generation": 6.8946, + "eval_exact_match_for_word_analogy": 20.625, + "eval_f1": 42.5919, + "eval_f1_for_answerability_classification": 54.1795, + "eval_f1_for_cause_effect_classification": 53.748, + "eval_f1_for_coreference_resolution": 42.1073, + "eval_f1_for_data_to_text": 40.8851, + "eval_f1_for_dialogue_act_recognition": 38.7143, + "eval_f1_for_grammar_error_correction": 60.9705, + "eval_f1_for_keyword_tagging": 49.2305, + "eval_f1_for_overlap_extraction": 32.6724, + "eval_f1_for_question_rewriting": 69.5588, + "eval_f1_for_task020_mctaco_answerability_classification": 53.0, + "eval_f1_for_task033_winogrande_coreference_resolution": 40.0, + "eval_f1_for_task034_winogrande_question_rewriting": 92.9596, + "eval_f1_for_task035_winogrande_question_rewriting": 86.7218, + "eval_f1_for_task036_qasc_keyword_tagging": 61.905, + "eval_f1_for_task039_qasc_overlap_extraction": 37.3333, + "eval_f1_for_task050_multirc_answerability_classification": 51.0, + "eval_f1_for_task102_commongen_data_to_text": 50.164, + "eval_f1_for_task1152_bard_word_analogy": 14.0, + "eval_f1_for_task1153_bard_word_analogy": 12.0, + "eval_f1_for_task1154_bard_word_analogy": 13.0, + "eval_f1_for_task1155_bard_word_analogy": 54.0, + "eval_f1_for_task1156_bard_word_analogy": 21.0, + "eval_f1_for_task1157_bard_word_analogy": 33.0, + "eval_f1_for_task1158_bard_word_analogy": 9.0, + "eval_f1_for_task1159_bard_word_analogy": 15.0, + "eval_f1_for_task1161_coda_19_title_generation": 26.6797, + "eval_f1_for_task1195_disfl_qa_question_rewriting": 79.251, + "eval_f1_for_task121_zest_question_rewriting": 50.0335, + "eval_f1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_f1_for_task1342_amazon_us_reviews_title_generation": 10.236, + "eval_f1_for_task1344_rte_textual_entailment": 57.0, + "eval_f1_for_task1345_qqp_question_rewriting": 40.6841, + "eval_f1_for_task1356_xlsum_title_generation": 14.3092, + "eval_f1_for_task1358_xlsum_title_generation": 30.2159, + "eval_f1_for_task1385_anli_textual_entailment": 33.0, + "eval_f1_for_task1386_anli_textual_entailment": 33.0, + "eval_f1_for_task1387_anli_textual_entailment": 32.0, + "eval_f1_for_task1388_cb_textual_entailment": 40.0, + "eval_f1_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_f1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_f1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_f1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_f1_for_task1407_dart_data_to_text": 24.1391, + "eval_f1_for_task1409_dart_data_to_text": 44.4623, + "eval_f1_for_task1415_youtube_caption_corrections_grammar_error_correction": 38.5381, + "eval_f1_for_task1439_doqa_answerability_classification": 51.0, + "eval_f1_for_task1442_doqa_answerability_classification": 55.0, + "eval_f1_for_task1516_imppres_textual_entailment": 21.0, + "eval_f1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_f1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_f1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_f1_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_f1_for_task1540_peer_read_title_generation": 29.6123, + "eval_f1_for_task1554_scitail_textual_entailment": 50.0, + "eval_f1_for_task1557_jfleg_grammar_error_correction": 83.4029, + "eval_f1_for_task1562_zest_question_rewriting": 56.1444, + "eval_f1_for_task1586_scifact_title_generation": 29.8019, + "eval_f1_for_task1598_nyc_data_to_text": 37.9641, + "eval_f1_for_task1612_sick_textual_entailment": 36.0, + "eval_f1_for_task1615_sick_textual_entailment": 33.0, + "eval_f1_for_task1622_disfl_qa_question_rewriting": 78.0414, + "eval_f1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_f1_for_task1631_open_pi_data_to_text": 69.7956, + "eval_f1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_f1_for_task1659_billsum_title_generation": 29.9413, + "eval_f1_for_task1664_wino_bias_coreference_resolution": 69.3381, + "eval_f1_for_task1728_web_nlg_data_to_text": 46.6191, + "eval_f1_for_task190_snli_textual_entailment": 50.0, + "eval_f1_for_task199_multinli_textual_entailment": 50.0, + "eval_f1_for_task200_multinli_textual_entailment": 34.0, + "eval_f1_for_task201_multinli_textual_entailment": 31.0, + "eval_f1_for_task202_multinli_textual_entailment": 33.0, + "eval_f1_for_task219_rocstories_title_generation": 14.7276, + "eval_f1_for_task220_rocstories_title_generation": 50.0, + "eval_f1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_f1_for_task232_iirc_answerability_classification": 50.0, + "eval_f1_for_task233_iirc_answerability_classification": 49.0, + "eval_f1_for_task242_tweetqa_answerability_classification": 55.0, + "eval_f1_for_task249_enhanced_wsc_coreference_resolution": 36.5111, + "eval_f1_for_task281_points_of_correspondence_overlap_extraction": 28.0115, + "eval_f1_for_task288_gigaword_title_generation": 24.3427, + "eval_f1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_f1_for_task304_numeric_fused_head_coreference_resolution": 54.027, + "eval_f1_for_task329_gap_coreference_resolution": 33.0, + "eval_f1_for_task330_gap_coreference_resolution": 48.2, + "eval_f1_for_task349_squad2.0_answerability_classification": 41.0, + "eval_f1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_f1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_f1_for_task393_cod3s_cause_effect_classification": 33.7225, + "eval_f1_for_task401_numeric_fused_head_coreference_resolution": 29.0, + "eval_f1_for_task402_grailqa_question_rewriting": 68.5476, + "eval_f1_for_task418_persent_title_generation": 18.4205, + "eval_f1_for_task442_com_qa_question_rewriting": 68.9022, + "eval_f1_for_task500_scruples_title_generation": 11.6424, + "eval_f1_for_task510_reddit_tifu_dataset_title_generation": 33.1563, + "eval_f1_for_task520_aquamuse_answerability_classification": 62.0, + "eval_f1_for_task569_recipe_nlg_title_generation": 17.8944, + "eval_f1_for_task602_wikitext_title_generation": 10.9844, + "eval_f1_for_task613_liar_keyword_tagging": 13.6667, + "eval_f1_for_task614_glucose_cause_effect_classification": 23.8464, + "eval_f1_for_task619_ohsumed_title_generation": 37.4578, + "eval_f1_for_task620_ohsumed_keyword_tagging": 33.4667, + "eval_f1_for_task623_ohsumed_keyword_tagging": 43.0, + "eval_f1_for_task640_e_snli_textual_entailment": 24.0, + "eval_f1_for_task641_e_snli_textual_entailment": 33.0, + "eval_f1_for_task642_e_snli_textual_entailment": 46.0, + "eval_f1_for_task645_wiki_auto_all_data_keyword_tagging": 94.1143, + "eval_f1_for_task648_winograd_wsc_coreference_resolution": 22.2073, + "eval_f1_for_task670_ambigqa_question_rewriting": 78.8428, + "eval_f1_for_task671_ambigqa_question_rewriting": 65.0181, + "eval_f1_for_task677_ollie_data_to_text": 14.7655, + "eval_f1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_f1_for_task743_eurlex_title_generation": 30.4127, + "eval_f1_for_task760_msr_sqa_data_to_text": 3.6811, + "eval_f1_for_task769_qed_title_generation": 83.2441, + "eval_f1_for_task827_copa_cause_effect_classification": 50.0, + "eval_f1_for_task828_copa_cause_effect_classification": 52.0, + "eval_f1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_f1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 20.0, + "eval_f1_for_task890_gwsd_textual_entailment": 41.0, + "eval_f1_for_task891_gap_coreference_resolution": 52.219, + "eval_f1_for_task892_gap_coreference_resolution": 19.0, + "eval_f1_for_task893_gap_coreference_resolution": 30.0, + "eval_f1_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_f1_for_task936_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_f1_for_task937_defeasible_nli_atomic_textual_entailment": 3.0, + "eval_f1_for_task957_e2e_data_to_text": 48.8445, + "eval_f1_for_task970_sherliic_textual_entailment": 50.0, + "eval_f1_for_textual_entailment": 38.4583, + "eval_f1_for_title_generation": 28.101, + "eval_f1_for_word_analogy": 21.375, + "eval_gen_len": 10.9452, + "eval_global_step": 8500, + "eval_loss": 1.4904685020446777, + "eval_rouge1": 44.1689, + "eval_rouge1_for_answerability_classification": 54.1795, + "eval_rouge1_for_cause_effect_classification": 53.9649, + "eval_rouge1_for_coreference_resolution": 42.9874, + "eval_rouge1_for_data_to_text": 44.7002, + "eval_rouge1_for_dialogue_act_recognition": 40.619, + "eval_rouge1_for_grammar_error_correction": 65.2353, + "eval_rouge1_for_keyword_tagging": 53.4956, + "eval_rouge1_for_overlap_extraction": 35.0664, + "eval_rouge1_for_question_rewriting": 70.9191, + "eval_rouge1_for_task020_mctaco_answerability_classification": 53.0, + "eval_rouge1_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rouge1_for_task034_winogrande_question_rewriting": 92.9358, + "eval_rouge1_for_task035_winogrande_question_rewriting": 87.3703, + "eval_rouge1_for_task036_qasc_keyword_tagging": 64.6305, + "eval_rouge1_for_task039_qasc_overlap_extraction": 41.1667, + "eval_rouge1_for_task050_multirc_answerability_classification": 51.0, + "eval_rouge1_for_task102_commongen_data_to_text": 63.5694, + "eval_rouge1_for_task1152_bard_word_analogy": 14.0, + "eval_rouge1_for_task1153_bard_word_analogy": 12.0, + "eval_rouge1_for_task1154_bard_word_analogy": 13.0, + "eval_rouge1_for_task1155_bard_word_analogy": 54.0, + "eval_rouge1_for_task1156_bard_word_analogy": 21.0, + "eval_rouge1_for_task1157_bard_word_analogy": 33.0, + "eval_rouge1_for_task1158_bard_word_analogy": 9.0, + "eval_rouge1_for_task1159_bard_word_analogy": 15.0, + "eval_rouge1_for_task1161_coda_19_title_generation": 29.7546, + "eval_rouge1_for_task1195_disfl_qa_question_rewriting": 79.5141, + "eval_rouge1_for_task121_zest_question_rewriting": 51.7434, + "eval_rouge1_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rouge1_for_task1342_amazon_us_reviews_title_generation": 10.4302, + "eval_rouge1_for_task1344_rte_textual_entailment": 57.0, + "eval_rouge1_for_task1345_qqp_question_rewriting": 44.1114, + "eval_rouge1_for_task1356_xlsum_title_generation": 16.2304, + "eval_rouge1_for_task1358_xlsum_title_generation": 33.9472, + "eval_rouge1_for_task1385_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1386_anli_textual_entailment": 33.0, + "eval_rouge1_for_task1387_anli_textual_entailment": 32.0, + "eval_rouge1_for_task1388_cb_textual_entailment": 40.0, + "eval_rouge1_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_rouge1_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rouge1_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_rouge1_for_task1407_dart_data_to_text": 27.8733, + "eval_rouge1_for_task1409_dart_data_to_text": 44.7404, + "eval_rouge1_for_task1415_youtube_caption_corrections_grammar_error_correction": 43.8111, + "eval_rouge1_for_task1439_doqa_answerability_classification": 51.0, + "eval_rouge1_for_task1442_doqa_answerability_classification": 55.0, + "eval_rouge1_for_task1516_imppres_textual_entailment": 21.0, + "eval_rouge1_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rouge1_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_rouge1_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rouge1_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_rouge1_for_task1540_peer_read_title_generation": 32.1405, + "eval_rouge1_for_task1554_scitail_textual_entailment": 50.0, + "eval_rouge1_for_task1557_jfleg_grammar_error_correction": 86.6595, + "eval_rouge1_for_task1562_zest_question_rewriting": 58.7737, + "eval_rouge1_for_task1586_scifact_title_generation": 33.2823, + "eval_rouge1_for_task1598_nyc_data_to_text": 40.1094, + "eval_rouge1_for_task1612_sick_textual_entailment": 36.0, + "eval_rouge1_for_task1615_sick_textual_entailment": 77.6667, + "eval_rouge1_for_task1622_disfl_qa_question_rewriting": 78.3142, + "eval_rouge1_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rouge1_for_task1631_open_pi_data_to_text": 70.755, + "eval_rouge1_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rouge1_for_task1659_billsum_title_generation": 31.5946, + "eval_rouge1_for_task1664_wino_bias_coreference_resolution": 69.3381, + "eval_rouge1_for_task1728_web_nlg_data_to_text": 55.6383, + "eval_rouge1_for_task190_snli_textual_entailment": 50.0, + "eval_rouge1_for_task199_multinli_textual_entailment": 50.0, + "eval_rouge1_for_task200_multinli_textual_entailment": 34.0, + "eval_rouge1_for_task201_multinli_textual_entailment": 31.0, + "eval_rouge1_for_task202_multinli_textual_entailment": 33.0, + "eval_rouge1_for_task219_rocstories_title_generation": 18.1338, + "eval_rouge1_for_task220_rocstories_title_generation": 50.0, + "eval_rouge1_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rouge1_for_task232_iirc_answerability_classification": 50.0, + "eval_rouge1_for_task233_iirc_answerability_classification": 49.0, + "eval_rouge1_for_task242_tweetqa_answerability_classification": 55.0, + "eval_rouge1_for_task249_enhanced_wsc_coreference_resolution": 37.1778, + "eval_rouge1_for_task281_points_of_correspondence_overlap_extraction": 28.966, + "eval_rouge1_for_task288_gigaword_title_generation": 27.1713, + "eval_rouge1_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rouge1_for_task304_numeric_fused_head_coreference_resolution": 56.027, + "eval_rouge1_for_task329_gap_coreference_resolution": 33.0, + "eval_rouge1_for_task330_gap_coreference_resolution": 48.6333, + "eval_rouge1_for_task349_squad2.0_answerability_classification": 41.0, + "eval_rouge1_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rouge1_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rouge1_for_task393_cod3s_cause_effect_classification": 33.9964, + "eval_rouge1_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_rouge1_for_task402_grailqa_question_rewriting": 70.1765, + "eval_rouge1_for_task418_persent_title_generation": 21.1449, + "eval_rouge1_for_task442_com_qa_question_rewriting": 72.0608, + "eval_rouge1_for_task500_scruples_title_generation": 13.4073, + "eval_rouge1_for_task510_reddit_tifu_dataset_title_generation": 33.6183, + "eval_rouge1_for_task520_aquamuse_answerability_classification": 62.0, + "eval_rouge1_for_task569_recipe_nlg_title_generation": 20.1002, + "eval_rouge1_for_task602_wikitext_title_generation": 11.5438, + "eval_rouge1_for_task613_liar_keyword_tagging": 25.6667, + "eval_rouge1_for_task614_glucose_cause_effect_classification": 25.0913, + "eval_rouge1_for_task619_ohsumed_title_generation": 39.8382, + "eval_rouge1_for_task620_ohsumed_keyword_tagging": 39.5667, + "eval_rouge1_for_task623_ohsumed_keyword_tagging": 43.0, + "eval_rouge1_for_task640_e_snli_textual_entailment": 24.0, + "eval_rouge1_for_task641_e_snli_textual_entailment": 33.0, + "eval_rouge1_for_task642_e_snli_textual_entailment": 46.0, + "eval_rouge1_for_task645_wiki_auto_all_data_keyword_tagging": 94.6143, + "eval_rouge1_for_task648_winograd_wsc_coreference_resolution": 22.1956, + "eval_rouge1_for_task670_ambigqa_question_rewriting": 79.4073, + "eval_rouge1_for_task671_ambigqa_question_rewriting": 65.7033, + "eval_rouge1_for_task677_ollie_data_to_text": 16.6649, + "eval_rouge1_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rouge1_for_task743_eurlex_title_generation": 32.4256, + "eval_rouge1_for_task760_msr_sqa_data_to_text": 3.9965, + "eval_rouge1_for_task769_qed_title_generation": 83.2783, + "eval_rouge1_for_task827_copa_cause_effect_classification": 50.0, + "eval_rouge1_for_task828_copa_cause_effect_classification": 52.0, + "eval_rouge1_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_rouge1_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rouge1_for_task890_gwsd_textual_entailment": 41.0, + "eval_rouge1_for_task891_gap_coreference_resolution": 52.4524, + "eval_rouge1_for_task892_gap_coreference_resolution": 19.0, + "eval_rouge1_for_task893_gap_coreference_resolution": 30.0, + "eval_rouge1_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rouge1_for_task936_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rouge1_for_task937_defeasible_nli_atomic_textual_entailment": 3.0, + "eval_rouge1_for_task957_e2e_data_to_text": 48.8337, + "eval_rouge1_for_task970_sherliic_textual_entailment": 50.0, + "eval_rouge1_for_textual_entailment": 40.3194, + "eval_rouge1_for_title_generation": 30.0557, + "eval_rouge1_for_word_analogy": 21.375, + "eval_rougeL": 42.9191, + "eval_rougeL_for_answerability_classification": 54.1795, + "eval_rougeL_for_cause_effect_classification": 53.369, + "eval_rougeL_for_coreference_resolution": 42.964, + "eval_rougeL_for_data_to_text": 38.0414, + "eval_rougeL_for_dialogue_act_recognition": 40.619, + "eval_rougeL_for_grammar_error_correction": 63.5615, + "eval_rougeL_for_keyword_tagging": 52.6725, + "eval_rougeL_for_overlap_extraction": 34.1829, + "eval_rougeL_for_question_rewriting": 67.6783, + "eval_rougeL_for_task020_mctaco_answerability_classification": 53.0, + "eval_rougeL_for_task033_winogrande_coreference_resolution": 42.0, + "eval_rougeL_for_task034_winogrande_question_rewriting": 92.9358, + "eval_rougeL_for_task035_winogrande_question_rewriting": 86.7649, + "eval_rougeL_for_task036_qasc_keyword_tagging": 62.1648, + "eval_rougeL_for_task039_qasc_overlap_extraction": 41.1667, + "eval_rougeL_for_task050_multirc_answerability_classification": 51.0, + "eval_rougeL_for_task102_commongen_data_to_text": 54.4793, + "eval_rougeL_for_task1152_bard_word_analogy": 14.0, + "eval_rougeL_for_task1153_bard_word_analogy": 12.0, + "eval_rougeL_for_task1154_bard_word_analogy": 13.0, + "eval_rougeL_for_task1155_bard_word_analogy": 54.0, + "eval_rougeL_for_task1156_bard_word_analogy": 21.0, + "eval_rougeL_for_task1157_bard_word_analogy": 33.0, + "eval_rougeL_for_task1158_bard_word_analogy": 9.0, + "eval_rougeL_for_task1159_bard_word_analogy": 15.0, + "eval_rougeL_for_task1161_coda_19_title_generation": 25.1145, + "eval_rougeL_for_task1195_disfl_qa_question_rewriting": 78.7028, + "eval_rougeL_for_task121_zest_question_rewriting": 46.3794, + "eval_rougeL_for_task133_winowhy_coreference_resolution": 50.0, + "eval_rougeL_for_task1342_amazon_us_reviews_title_generation": 9.7925, + "eval_rougeL_for_task1344_rte_textual_entailment": 57.0, + "eval_rougeL_for_task1345_qqp_question_rewriting": 41.1743, + "eval_rougeL_for_task1356_xlsum_title_generation": 13.5718, + "eval_rougeL_for_task1358_xlsum_title_generation": 29.102, + "eval_rougeL_for_task1385_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1386_anli_textual_entailment": 33.0, + "eval_rougeL_for_task1387_anli_textual_entailment": 32.0, + "eval_rougeL_for_task1388_cb_textual_entailment": 40.0, + "eval_rougeL_for_task1390_wsc_fiexed_coreference_resolution": 56.0, + "eval_rougeL_for_task1391_winogrande_coreference_resolution": 50.0, + "eval_rougeL_for_task1393_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task1394_meta_woz_dialogue_act_recognition": 3.0, + "eval_rougeL_for_task1407_dart_data_to_text": 25.3191, + "eval_rougeL_for_task1409_dart_data_to_text": 38.3704, + "eval_rougeL_for_task1415_youtube_caption_corrections_grammar_error_correction": 41.3659, + "eval_rougeL_for_task1439_doqa_answerability_classification": 51.0, + "eval_rougeL_for_task1442_doqa_answerability_classification": 55.0, + "eval_rougeL_for_task1516_imppres_textual_entailment": 21.0, + "eval_rougeL_for_task1529_scitailv1.1_textual_entailment": 50.0, + "eval_rougeL_for_task1531_dailydialog_dialogue_act_recognition": 36.0, + "eval_rougeL_for_task1533_dailydialog_dialogue_act_recognition": 49.0, + "eval_rougeL_for_task1534_dailydialog_dialogue_act_recognition": 47.0, + "eval_rougeL_for_task1540_peer_read_title_generation": 29.9446, + "eval_rougeL_for_task1554_scitail_textual_entailment": 50.0, + "eval_rougeL_for_task1557_jfleg_grammar_error_correction": 85.757, + "eval_rougeL_for_task1562_zest_question_rewriting": 51.7396, + "eval_rougeL_for_task1586_scifact_title_generation": 28.1246, + "eval_rougeL_for_task1598_nyc_data_to_text": 32.2284, + "eval_rougeL_for_task1612_sick_textual_entailment": 36.0, + "eval_rougeL_for_task1615_sick_textual_entailment": 77.6667, + "eval_rougeL_for_task1622_disfl_qa_question_rewriting": 77.0802, + "eval_rougeL_for_task1624_disfl_qa_answerability_classification": 49.0, + "eval_rougeL_for_task1631_open_pi_data_to_text": 58.7649, + "eval_rougeL_for_task1640_adverserial_qa_answerability_classification": 54.0, + "eval_rougeL_for_task1659_billsum_title_generation": 26.3813, + "eval_rougeL_for_task1664_wino_bias_coreference_resolution": 69.3381, + "eval_rougeL_for_task1728_web_nlg_data_to_text": 49.2408, + "eval_rougeL_for_task190_snli_textual_entailment": 50.0, + "eval_rougeL_for_task199_multinli_textual_entailment": 50.0, + "eval_rougeL_for_task200_multinli_textual_entailment": 34.0, + "eval_rougeL_for_task201_multinli_textual_entailment": 31.0, + "eval_rougeL_for_task202_multinli_textual_entailment": 33.0, + "eval_rougeL_for_task219_rocstories_title_generation": 17.7783, + "eval_rougeL_for_task220_rocstories_title_generation": 50.0, + "eval_rougeL_for_task226_curated_from_stack_overflow___english._answerability_classification": 51.0, + "eval_rougeL_for_task232_iirc_answerability_classification": 50.0, + "eval_rougeL_for_task233_iirc_answerability_classification": 49.0, + "eval_rougeL_for_task242_tweetqa_answerability_classification": 55.0, + "eval_rougeL_for_task249_enhanced_wsc_coreference_resolution": 36.9556, + "eval_rougeL_for_task281_points_of_correspondence_overlap_extraction": 27.1991, + "eval_rougeL_for_task288_gigaword_title_generation": 23.2403, + "eval_rougeL_for_task290_tellmewhy_answerability_classification": 83.3333, + "eval_rougeL_for_task304_numeric_fused_head_coreference_resolution": 56.027, + "eval_rougeL_for_task329_gap_coreference_resolution": 33.0, + "eval_rougeL_for_task330_gap_coreference_resolution": 48.6333, + "eval_rougeL_for_task349_squad2.0_answerability_classification": 41.0, + "eval_rougeL_for_task362_spolin_dialogue_act_recognition": 75.0, + "eval_rougeL_for_task391_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task392_cod3s_cause_effect_classification": 83.3333, + "eval_rougeL_for_task393_cod3s_cause_effect_classification": 32.931, + "eval_rougeL_for_task401_numeric_fused_head_coreference_resolution": 36.0, + "eval_rougeL_for_task402_grailqa_question_rewriting": 58.8799, + "eval_rougeL_for_task418_persent_title_generation": 18.3443, + "eval_rougeL_for_task442_com_qa_question_rewriting": 67.8179, + "eval_rougeL_for_task500_scruples_title_generation": 12.1831, + "eval_rougeL_for_task510_reddit_tifu_dataset_title_generation": 32.7368, + "eval_rougeL_for_task520_aquamuse_answerability_classification": 62.0, + "eval_rougeL_for_task569_recipe_nlg_title_generation": 19.4113, + "eval_rougeL_for_task602_wikitext_title_generation": 11.4252, + "eval_rougeL_for_task613_liar_keyword_tagging": 25.6667, + "eval_rougeL_for_task614_glucose_cause_effect_classification": 21.9853, + "eval_rougeL_for_task619_ohsumed_title_generation": 36.2886, + "eval_rougeL_for_task620_ohsumed_keyword_tagging": 37.9167, + "eval_rougeL_for_task623_ohsumed_keyword_tagging": 43.0, + "eval_rougeL_for_task640_e_snli_textual_entailment": 24.0, + "eval_rougeL_for_task641_e_snli_textual_entailment": 33.0, + "eval_rougeL_for_task642_e_snli_textual_entailment": 46.0, + "eval_rougeL_for_task645_wiki_auto_all_data_keyword_tagging": 94.6143, + "eval_rougeL_for_task648_winograd_wsc_coreference_resolution": 22.0903, + "eval_rougeL_for_task670_ambigqa_question_rewriting": 78.2705, + "eval_rougeL_for_task671_ambigqa_question_rewriting": 64.7164, + "eval_rougeL_for_task677_ollie_data_to_text": 14.4516, + "eval_rougeL_for_task738_perspectrum_textual_entailment": 50.0, + "eval_rougeL_for_task743_eurlex_title_generation": 26.8231, + "eval_rougeL_for_task760_msr_sqa_data_to_text": 3.4584, + "eval_rougeL_for_task769_qed_title_generation": 83.2783, + "eval_rougeL_for_task827_copa_cause_effect_classification": 50.0, + "eval_rougeL_for_task828_copa_cause_effect_classification": 52.0, + "eval_rougeL_for_task879_schema_guided_dstc8_dialogue_act_recognition": 41.0, + "eval_rougeL_for_task880_schema_guided_dstc8_dialogue_act_recognition": 33.3333, + "eval_rougeL_for_task890_gwsd_textual_entailment": 41.0, + "eval_rougeL_for_task891_gap_coreference_resolution": 52.4524, + "eval_rougeL_for_task892_gap_coreference_resolution": 19.0, + "eval_rougeL_for_task893_gap_coreference_resolution": 30.0, + "eval_rougeL_for_task935_defeasible_nli_atomic_textual_entailment": 46.0, + "eval_rougeL_for_task936_defeasible_nli_atomic_textual_entailment": 47.0, + "eval_rougeL_for_task937_defeasible_nli_atomic_textual_entailment": 3.0, + "eval_rougeL_for_task957_e2e_data_to_text": 40.4682, + "eval_rougeL_for_task970_sherliic_textual_entailment": 50.0, + "eval_rougeL_for_textual_entailment": 40.3194, + "eval_rougeL_for_title_generation": 27.5624, + "eval_rougeL_for_word_analogy": 21.375, + "eval_runtime": 406.8287, + "eval_samples_per_second": 29.275, + "eval_steps_per_second": 0.917, + "step": 8500 + }, + { + "epoch": 2.0, + "step": 8748, + "total_flos": 1.4155487596720947e+17, + "train_loss": 1.4893177034214964, + "train_runtime": 18663.9849, + "train_samples_per_second": 7.498, + "train_steps_per_second": 0.469 + } + ], + "max_steps": 8748, + "num_train_epochs": 2, + "total_flos": 1.4155487596720947e+17, + "trial_name": null, + "trial_params": null +}