File size: 3,034 Bytes
256a159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
compassbench_v1_language_names = [
    # ['information_retrieval_en', 'score'],
    # ['information_retrieval_zh', 'score'],
    ['intention_recognition_en_circular', 'acc_origin'],
    ['intention_recognition_en_circular', 'perf_circular'],
    ['intention_recognition_zh_circular', 'acc_origin'],
    ['intention_recognition_zh_circular', 'perf_circular'],
    ['sentiment_analysis_en_circular', 'acc_origin'],
    ['sentiment_analysis_en_circular', 'perf_circular'],
    ['sentiment_analysis_zh_circular', 'acc_origin'],
    ['sentiment_analysis_zh_circular', 'perf_circular'],
    ['translation', 'score'],
    ['content_critic_en_circular', 'acc_origin'],
    ['content_critic_en_circular', 'perf_circular'],
    ['content_critic_zh_circular', 'acc_origin'],
    ['content_critic_zh_circular', 'perf_circular'],
    ['content_summarization_en', 'rouge1'],
    ['content_summarization_zh', 'rouge1'],
    ['traditional_cultural_understanding_zh_circular', 'acc_origin'],
    ['traditional_cultural_understanding_zh_circular', 'perf_circular'],
    ['chinese_semantic_understanding_zh_circular', 'acc_origin'],
    ['chinese_semantic_understanding_zh_circular', 'perf_circular'],
]

compassbench_v1_language_groups = [
    {'name': 'language_zh_acc_1_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_zh' in name and metric != 'perf_circular']},
    {'name': 'language_en_acc_1_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_en' in name and metric != 'perf_circular']},
    {'name': 'language_acc_1_and_non_mcq', 'subsets': ['language_zh_acc_1_and_non_mcq', 'language_en_acc_1_and_non_mcq']},

    {'name': 'language_zh_perf_4_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_zh' in name and metric != 'acc_origin']},
    {'name': 'language_en_perf_4_and_non_mcq', 'subsets': [[name, metric] for name, metric in compassbench_v1_language_names if '_en' in name and metric != 'acc_origin']},
    {'name': 'language_perf_4_and_non_mcq', 'subsets': ['language_zh_perf_4_and_non_mcq', 'language_en_perf_4_and_non_mcq']},
]

summarizer = dict(
    dataset_abbrs=[
        'language_perf_4_and_non_mcq',
        'language_zh_perf_4_and_non_mcq',
        'language_en_perf_4_and_non_mcq',
        ['intention_recognition_zh_circular', 'perf_circular'],
        ['intention_recognition_en_circular', 'perf_circular'],
        ['sentiment_analysis_zh_circular', 'perf_circular'],
        ['sentiment_analysis_en_circular', 'perf_circular'],
        ['translation', 'score'],
        ['content_critic_zh_circular', 'perf_circular'],
        ['content_critic_en_circular', 'perf_circular'],
        ['content_summarization_zh', 'rouge1'],
        ['content_summarization_en', 'rouge1'],
        ['traditional_cultural_understanding_zh_circular', 'perf_circular'],
        ['chinese_semantic_understanding_zh_circular', 'perf_circular'],
    ],
    summary_groups=compassbench_v1_language_groups,
)