|
compassbench_v1_language_names = [ |
|
|
|
|
|
['intention_recognition_en_circular', 'acc_origin'], |
|
['intention_recognition_en_circular', 'perf_circular'], |
|
['intention_recognition_zh_circular', 'acc_origin'], |
|
['intention_recognition_zh_circular', 'perf_circular'], |
|
['sentiment_analysis_en_circular', 'acc_origin'], |
|
['sentiment_analysis_en_circular', 'perf_circular'], |
|
['sentiment_analysis_zh_circular', 'acc_origin'], |
|
['sentiment_analysis_zh_circular', 'perf_circular'], |
|
['translation', 'score'], |
|
['content_critic_en_circular', 'acc_origin'], |
|
['content_critic_en_circular', 'perf_circular'], |
|
['content_critic_zh_circular', 'acc_origin'], |
|
['content_critic_zh_circular', 'perf_circular'], |
|
['content_summarization_en', 'rouge1'], |
|
['content_summarization_zh', 'rouge1'], |
|
['traditional_cultural_understanding_zh_circular', 'acc_origin'], |
|
['traditional_cultural_understanding_zh_circular', 'perf_circular'], |
|
['chinese_semantic_understanding_zh_circular', 'acc_origin'], |
|
['chinese_semantic_understanding_zh_circular', 'perf_circular'], |
|
] |
|
|
|
compassbench_v1_language_groups = [ |
|
{'name': 'language_zh_acc_1_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_zh' in name and metric != 'perf_circular']}, |
|
{'name': 'language_en_acc_1_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_en' in name and metric != 'perf_circular']}, |
|
{'name': 'language_acc_1_and_non_mcq', 'subsets': ['language_zh_acc_1_and_non_mcq', 'language_en_acc_1_and_non_mcq']}, |
|
|
|
{'name': 'language_zh_perf_4_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_zh' in name and metric != 'acc_origin']}, |
|
{'name': 'language_en_perf_4_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_en' in name and metric != 'acc_origin']}, |
|
{'name': 'language_perf_4_and_non_mcq', 'subsets': ['language_zh_perf_4_and_non_mcq', 'language_en_perf_4_and_non_mcq']}, |
|
] |
|
|
|
summarizer = dict( |
|
dataset_abbrs=[ |
|
'language_perf_4_and_non_mcq', |
|
'language_zh_perf_4_and_non_mcq', |
|
'language_en_perf_4_and_non_mcq', |
|
['intention_recognition_zh_circular', 'perf_circular'], |
|
['intention_recognition_en_circular', 'perf_circular'], |
|
['sentiment_analysis_zh_circular', 'perf_circular'], |
|
['sentiment_analysis_en_circular', 'perf_circular'], |
|
['translation', 'score'], |
|
['content_critic_zh_circular', 'perf_circular'], |
|
['content_critic_en_circular', 'perf_circular'], |
|
['content_summarization_zh', 'rouge1'], |
|
['content_summarization_en', 'rouge1'], |
|
['traditional_cultural_understanding_zh_circular', 'perf_circular'], |
|
['chinese_semantic_understanding_zh_circular', 'perf_circular'], |
|
], |
|
summary_groups=compassbench_v1_language_groups, |
|
) |
|
|