|
from mmengine.config import read_base |
|
|
|
with read_base(): |
|
from .groups.agieval import agieval_summary_groups |
|
from .groups.mmlu import mmlu_summary_groups |
|
from .groups.cmmlu import cmmlu_summary_groups |
|
from .groups.ceval import ceval_summary_groups |
|
from .groups.bbh import bbh_summary_groups |
|
from .groups.GaokaoBench import GaokaoBench_summary_groups |
|
from .groups.flores import flores_summary_groups |
|
from .groups.jigsaw_multilingual import jigsaw_multilingual_summary_groups |
|
from .groups.tydiqa import tydiqa_summary_groups |
|
from .groups.xiezhi import xiezhi_summary_groups |
|
|
|
summarizer = dict( |
|
dataset_abbrs=[ |
|
'--------- 考试 Exam ---------', |
|
|
|
"ceval", |
|
'agieval', |
|
'mmlu', |
|
"GaokaoBench", |
|
'ARC-c', |
|
'--------- 语言 Language ---------', |
|
|
|
'WiC', |
|
'summedits', |
|
|
|
'chid-dev', |
|
|
|
'afqmc-dev', |
|
'bustm-dev', |
|
|
|
'cluewsc-dev', |
|
'WSC', |
|
'winogrande', |
|
|
|
'flores_100', |
|
'--------- 知识 Knowledge ---------', |
|
|
|
'BoolQ', |
|
'commonsense_qa', |
|
'nq', |
|
'triviaqa', |
|
|
|
'--------- 推理 Reasoning ---------', |
|
|
|
'cmnli', |
|
'ocnli', |
|
'ocnli_fc-dev', |
|
'AX_b', |
|
'AX_g', |
|
'CB', |
|
'RTE', |
|
|
|
'story_cloze', |
|
'COPA', |
|
'ReCoRD', |
|
'hellaswag', |
|
'piqa', |
|
'siqa', |
|
'strategyqa', |
|
|
|
'math', |
|
'gsm8k', |
|
|
|
'TheoremQA', |
|
|
|
'openai_humaneval', |
|
'mbpp', |
|
|
|
"bbh", |
|
'--------- 理解 Understanding ---------', |
|
|
|
'C3', |
|
'CMRC_dev', |
|
'DRCD_dev', |
|
'MultiRC', |
|
'race-middle', |
|
'race-high', |
|
'openbookqa_fact', |
|
|
|
'csl_dev', |
|
'lcsts', |
|
'Xsum', |
|
|
|
'eprstmt-dev', |
|
'lambada', |
|
'tnews-dev', |
|
], |
|
summary_groups=sum( |
|
[v for k, v in locals().items() if k.endswith("_summary_groups")], []), |
|
) |
|
|