TwT-6's picture
Upload 2667 files
256a159 verified
datasets=[
dict(abbr='siqa',
eval_cfg=dict(
evaluator=dict(
type='opencompass.openicl.icl_evaluator.EDAccEvaluator'),
pred_role='BOT'),
infer_cfg=dict(
inferencer=dict(
type='opencompass.openicl.icl_inferencer.GenInferencer'),
prompt_template=dict(
template=dict(
round=[
dict(prompt='{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}\nAnswer:',
role='HUMAN'),
]),
type='opencompass.openicl.icl_prompt_template.PromptTemplate'),
retriever=dict(
type='opencompass.openicl.icl_retriever.ZeroRetriever')),
path='./data/siqa',
reader_cfg=dict(
input_columns=[
'context',
'question',
'answerA',
'answerB',
'answerC',
],
output_column='all_labels',
test_split='validation'),
type='opencompass.datasets.siqaDataset_V2'),
]
models=[
dict(abbr='my_api',
api_key='',
batch_size=8,
max_out_len=100,
max_seq_len=2048,
meta_template=dict(
round=[
dict(api_role='HUMAN',
role='HUMAN'),
dict(api_role='BOT',
generate=True,
role='BOT'),
]),
path='my_api',
run_cfg=dict(
num_gpus=1,
num_procs=1),
type='opencompass.models.my_api.MyAPIModel',
url='http://127.0.0.1:12345/testing'),
]
summarizer=dict(
summary_groups=[
dict(name='agieval-chinese',
subsets=[
'agieval-gaokao-chinese',
'agieval-gaokao-english',
'agieval-gaokao-geography',
'agieval-gaokao-history',
'agieval-gaokao-biology',
'agieval-gaokao-chemistry',
'agieval-gaokao-physics',
'agieval-gaokao-mathqa',
'agieval-logiqa-zh',
'agieval-jec-qa-kd',
'agieval-jec-qa-ca',
'agieval-gaokao-mathcloze',
]),
dict(name='agieval-english',
subsets=[
'agieval-lsat-ar',
'agieval-lsat-lr',
'agieval-lsat-rc',
'agieval-logiqa-en',
'agieval-sat-math',
'agieval-sat-en',
'agieval-sat-en-without-passage',
'agieval-aqua-rat',
'agieval-math',
]),
dict(name='agieval-gaokao',
subsets=[
'agieval-gaokao-chinese',
'agieval-gaokao-english',
'agieval-gaokao-geography',
'agieval-gaokao-history',
'agieval-gaokao-biology',
'agieval-gaokao-chemistry',
'agieval-gaokao-physics',
'agieval-gaokao-mathqa',
'agieval-gaokao-mathcloze',
]),
dict(name='agieval',
subsets=[
'agieval-gaokao-chinese',
'agieval-gaokao-english',
'agieval-gaokao-geography',
'agieval-gaokao-history',
'agieval-gaokao-biology',
'agieval-gaokao-chemistry',
'agieval-gaokao-physics',
'agieval-gaokao-mathqa',
'agieval-logiqa-zh',
'agieval-lsat-ar',
'agieval-lsat-lr',
'agieval-lsat-rc',
'agieval-logiqa-en',
'agieval-sat-math',
'agieval-sat-en',
'agieval-sat-en-without-passage',
'agieval-aqua-rat',
'agieval-jec-qa-kd',
'agieval-jec-qa-ca',
'agieval-gaokao-mathcloze',
'agieval-math',
]),
dict(name='mmlu-humanities',
subsets=[
'lukaemon_mmlu_formal_logic',
'lukaemon_mmlu_high_school_european_history',
'lukaemon_mmlu_high_school_us_history',
'lukaemon_mmlu_high_school_world_history',
'lukaemon_mmlu_international_law',
'lukaemon_mmlu_jurisprudence',
'lukaemon_mmlu_logical_fallacies',
'lukaemon_mmlu_moral_disputes',
'lukaemon_mmlu_moral_scenarios',
'lukaemon_mmlu_philosophy',
'lukaemon_mmlu_prehistory',
'lukaemon_mmlu_professional_law',
'lukaemon_mmlu_world_religions',
]),
dict(name='mmlu-stem',
subsets=[
'lukaemon_mmlu_abstract_algebra',
'lukaemon_mmlu_anatomy',
'lukaemon_mmlu_astronomy',
'lukaemon_mmlu_college_biology',
'lukaemon_mmlu_college_chemistry',
'lukaemon_mmlu_college_computer_science',
'lukaemon_mmlu_college_mathematics',
'lukaemon_mmlu_college_physics',
'lukaemon_mmlu_computer_security',
'lukaemon_mmlu_conceptual_physics',
'lukaemon_mmlu_electrical_engineering',
'lukaemon_mmlu_elementary_mathematics',
'lukaemon_mmlu_high_school_biology',
'lukaemon_mmlu_high_school_chemistry',
'lukaemon_mmlu_high_school_computer_science',
'lukaemon_mmlu_high_school_mathematics',
'lukaemon_mmlu_high_school_physics',
'lukaemon_mmlu_high_school_statistics',
'lukaemon_mmlu_machine_learning',
]),
dict(name='mmlu-social-science',
subsets=[
'lukaemon_mmlu_econometrics',
'lukaemon_mmlu_high_school_geography',
'lukaemon_mmlu_high_school_government_and_politics',
'lukaemon_mmlu_high_school_macroeconomics',
'lukaemon_mmlu_high_school_microeconomics',
'lukaemon_mmlu_high_school_psychology',
'lukaemon_mmlu_human_sexuality',
'lukaemon_mmlu_professional_psychology',
'lukaemon_mmlu_public_relations',
'lukaemon_mmlu_security_studies',
'lukaemon_mmlu_sociology',
'lukaemon_mmlu_us_foreign_policy',
]),
dict(name='mmlu-other',
subsets=[
'lukaemon_mmlu_business_ethics',
'lukaemon_mmlu_clinical_knowledge',
'lukaemon_mmlu_college_medicine',
'lukaemon_mmlu_global_facts',
'lukaemon_mmlu_human_aging',
'lukaemon_mmlu_management',
'lukaemon_mmlu_marketing',
'lukaemon_mmlu_medical_genetics',
'lukaemon_mmlu_miscellaneous',
'lukaemon_mmlu_nutrition',
'lukaemon_mmlu_professional_accounting',
'lukaemon_mmlu_professional_medicine',
'lukaemon_mmlu_virology',
]),
dict(name='mmlu',
subsets=[
'lukaemon_mmlu_formal_logic',
'lukaemon_mmlu_high_school_european_history',
'lukaemon_mmlu_high_school_us_history',
'lukaemon_mmlu_high_school_world_history',
'lukaemon_mmlu_international_law',
'lukaemon_mmlu_jurisprudence',
'lukaemon_mmlu_logical_fallacies',
'lukaemon_mmlu_moral_disputes',
'lukaemon_mmlu_moral_scenarios',
'lukaemon_mmlu_philosophy',
'lukaemon_mmlu_prehistory',
'lukaemon_mmlu_professional_law',
'lukaemon_mmlu_world_religions',
'lukaemon_mmlu_abstract_algebra',
'lukaemon_mmlu_anatomy',
'lukaemon_mmlu_astronomy',
'lukaemon_mmlu_college_biology',
'lukaemon_mmlu_college_chemistry',
'lukaemon_mmlu_college_computer_science',
'lukaemon_mmlu_college_mathematics',
'lukaemon_mmlu_college_physics',
'lukaemon_mmlu_computer_security',
'lukaemon_mmlu_conceptual_physics',
'lukaemon_mmlu_electrical_engineering',
'lukaemon_mmlu_elementary_mathematics',
'lukaemon_mmlu_high_school_biology',
'lukaemon_mmlu_high_school_chemistry',
'lukaemon_mmlu_high_school_computer_science',
'lukaemon_mmlu_high_school_mathematics',
'lukaemon_mmlu_high_school_physics',
'lukaemon_mmlu_high_school_statistics',
'lukaemon_mmlu_machine_learning',
'lukaemon_mmlu_econometrics',
'lukaemon_mmlu_high_school_geography',
'lukaemon_mmlu_high_school_government_and_politics',
'lukaemon_mmlu_high_school_macroeconomics',
'lukaemon_mmlu_high_school_microeconomics',
'lukaemon_mmlu_high_school_psychology',
'lukaemon_mmlu_human_sexuality',
'lukaemon_mmlu_professional_psychology',
'lukaemon_mmlu_public_relations',
'lukaemon_mmlu_security_studies',
'lukaemon_mmlu_sociology',
'lukaemon_mmlu_us_foreign_policy',
'lukaemon_mmlu_business_ethics',
'lukaemon_mmlu_clinical_knowledge',
'lukaemon_mmlu_college_medicine',
'lukaemon_mmlu_global_facts',
'lukaemon_mmlu_human_aging',
'lukaemon_mmlu_management',
'lukaemon_mmlu_marketing',
'lukaemon_mmlu_medical_genetics',
'lukaemon_mmlu_miscellaneous',
'lukaemon_mmlu_nutrition',
'lukaemon_mmlu_professional_accounting',
'lukaemon_mmlu_professional_medicine',
'lukaemon_mmlu_virology',
]),
dict(name='mmlu-weighted',
subsets=[
'lukaemon_mmlu_formal_logic',
'lukaemon_mmlu_high_school_european_history',
'lukaemon_mmlu_high_school_us_history',
'lukaemon_mmlu_high_school_world_history',
'lukaemon_mmlu_international_law',
'lukaemon_mmlu_jurisprudence',
'lukaemon_mmlu_logical_fallacies',
'lukaemon_mmlu_moral_disputes',
'lukaemon_mmlu_moral_scenarios',
'lukaemon_mmlu_philosophy',
'lukaemon_mmlu_prehistory',
'lukaemon_mmlu_professional_law',
'lukaemon_mmlu_world_religions',
'lukaemon_mmlu_abstract_algebra',
'lukaemon_mmlu_anatomy',
'lukaemon_mmlu_astronomy',
'lukaemon_mmlu_college_biology',
'lukaemon_mmlu_college_chemistry',
'lukaemon_mmlu_college_computer_science',
'lukaemon_mmlu_college_mathematics',
'lukaemon_mmlu_college_physics',
'lukaemon_mmlu_computer_security',
'lukaemon_mmlu_conceptual_physics',
'lukaemon_mmlu_electrical_engineering',
'lukaemon_mmlu_elementary_mathematics',
'lukaemon_mmlu_high_school_biology',
'lukaemon_mmlu_high_school_chemistry',
'lukaemon_mmlu_high_school_computer_science',
'lukaemon_mmlu_high_school_mathematics',
'lukaemon_mmlu_high_school_physics',
'lukaemon_mmlu_high_school_statistics',
'lukaemon_mmlu_machine_learning',
'lukaemon_mmlu_econometrics',
'lukaemon_mmlu_high_school_geography',
'lukaemon_mmlu_high_school_government_and_politics',
'lukaemon_mmlu_high_school_macroeconomics',
'lukaemon_mmlu_high_school_microeconomics',
'lukaemon_mmlu_high_school_psychology',
'lukaemon_mmlu_human_sexuality',
'lukaemon_mmlu_professional_psychology',
'lukaemon_mmlu_public_relations',
'lukaemon_mmlu_security_studies',
'lukaemon_mmlu_sociology',
'lukaemon_mmlu_us_foreign_policy',
'lukaemon_mmlu_business_ethics',
'lukaemon_mmlu_clinical_knowledge',
'lukaemon_mmlu_college_medicine',
'lukaemon_mmlu_global_facts',
'lukaemon_mmlu_human_aging',
'lukaemon_mmlu_management',
'lukaemon_mmlu_marketing',
'lukaemon_mmlu_medical_genetics',
'lukaemon_mmlu_miscellaneous',
'lukaemon_mmlu_nutrition',
'lukaemon_mmlu_professional_accounting',
'lukaemon_mmlu_professional_medicine',
'lukaemon_mmlu_virology',
],
weights=dict(
lukaemon_mmlu_abstract_algebra=100,
lukaemon_mmlu_anatomy=135,
lukaemon_mmlu_astronomy=152,
lukaemon_mmlu_business_ethics=100,
lukaemon_mmlu_clinical_knowledge=265,
lukaemon_mmlu_college_biology=144,
lukaemon_mmlu_college_chemistry=100,
lukaemon_mmlu_college_computer_science=100,
lukaemon_mmlu_college_mathematics=100,
lukaemon_mmlu_college_medicine=173,
lukaemon_mmlu_college_physics=102,
lukaemon_mmlu_computer_security=100,
lukaemon_mmlu_conceptual_physics=235,
lukaemon_mmlu_econometrics=114,
lukaemon_mmlu_electrical_engineering=145,
lukaemon_mmlu_elementary_mathematics=378,
lukaemon_mmlu_formal_logic=126,
lukaemon_mmlu_global_facts=100,
lukaemon_mmlu_high_school_biology=310,
lukaemon_mmlu_high_school_chemistry=203,
lukaemon_mmlu_high_school_computer_science=100,
lukaemon_mmlu_high_school_european_history=165,
lukaemon_mmlu_high_school_geography=198,
lukaemon_mmlu_high_school_government_and_politics=193,
lukaemon_mmlu_high_school_macroeconomics=390,
lukaemon_mmlu_high_school_mathematics=270,
lukaemon_mmlu_high_school_microeconomics=238,
lukaemon_mmlu_high_school_physics=151,
lukaemon_mmlu_high_school_psychology=545,
lukaemon_mmlu_high_school_statistics=216,
lukaemon_mmlu_high_school_us_history=204,
lukaemon_mmlu_high_school_world_history=237,
lukaemon_mmlu_human_aging=223,
lukaemon_mmlu_human_sexuality=131,
lukaemon_mmlu_international_law=121,
lukaemon_mmlu_jurisprudence=108,
lukaemon_mmlu_logical_fallacies=163,
lukaemon_mmlu_machine_learning=112,
lukaemon_mmlu_management=103,
lukaemon_mmlu_marketing=234,
lukaemon_mmlu_medical_genetics=100,
lukaemon_mmlu_miscellaneous=783,
lukaemon_mmlu_moral_disputes=346,
lukaemon_mmlu_moral_scenarios=895,
lukaemon_mmlu_nutrition=306,
lukaemon_mmlu_philosophy=311,
lukaemon_mmlu_prehistory=324,
lukaemon_mmlu_professional_accounting=282,
lukaemon_mmlu_professional_law=1534,
lukaemon_mmlu_professional_medicine=272,
lukaemon_mmlu_professional_psychology=612,
lukaemon_mmlu_public_relations=110,
lukaemon_mmlu_security_studies=245,
lukaemon_mmlu_sociology=201,
lukaemon_mmlu_us_foreign_policy=100,
lukaemon_mmlu_virology=166,
lukaemon_mmlu_world_religions=171)),
dict(name='cmmlu-humanities',
subsets=[
'cmmlu-arts',
'cmmlu-chinese_history',
'cmmlu-chinese_literature',
'cmmlu-college_law',
'cmmlu-global_facts',
'cmmlu-international_law',
'cmmlu-jurisprudence',
'cmmlu-logical',
'cmmlu-marxist_theory',
'cmmlu-philosophy',
'cmmlu-professional_law',
'cmmlu-world_history',
'cmmlu-world_religions',
]),
dict(name='cmmlu-stem',
subsets=[
'cmmlu-anatomy',
'cmmlu-astronomy',
'cmmlu-college_actuarial_science',
'cmmlu-college_engineering_hydrology',
'cmmlu-college_mathematics',
'cmmlu-college_medical_statistics',
'cmmlu-computer_science',
'cmmlu-conceptual_physics',
'cmmlu-electrical_engineering',
'cmmlu-elementary_mathematics',
'cmmlu-genetics',
'cmmlu-high_school_biology',
'cmmlu-high_school_chemistry',
'cmmlu-high_school_mathematics',
'cmmlu-high_school_physics',
'cmmlu-machine_learning',
'cmmlu-virology',
]),
dict(name='cmmlu-social-science',
subsets=[
'cmmlu-ancient_chinese',
'cmmlu-business_ethics',
'cmmlu-chinese_civil_service_exam',
'cmmlu-chinese_food_culture',
'cmmlu-chinese_foreign_policy',
'cmmlu-chinese_teacher_qualification',
'cmmlu-college_education',
'cmmlu-economics',
'cmmlu-education',
'cmmlu-elementary_chinese',
'cmmlu-ethnology',
'cmmlu-high_school_geography',
'cmmlu-high_school_politics',
'cmmlu-journalism',
'cmmlu-management',
'cmmlu-marketing',
'cmmlu-modern_chinese',
'cmmlu-professional_accounting',
'cmmlu-professional_psychology',
'cmmlu-public_relations',
'cmmlu-security_study',
'cmmlu-sociology',
]),
dict(name='cmmlu-other',
subsets=[
'cmmlu-agronomy',
'cmmlu-chinese_driving_rule',
'cmmlu-clinical_knowledge',
'cmmlu-college_medicine',
'cmmlu-computer_security',
'cmmlu-construction_project_management',
'cmmlu-elementary_commonsense',
'cmmlu-elementary_information_and_technology',
'cmmlu-food_science',
'cmmlu-human_sexuality',
'cmmlu-legal_and_moral_basis',
'cmmlu-nutrition',
'cmmlu-professional_medicine',
'cmmlu-sports_science',
'cmmlu-traditional_chinese_medicine',
]),
dict(name='cmmlu-china-specific',
subsets=[
'cmmlu-ancient_chinese',
'cmmlu-chinese_civil_service_exam',
'cmmlu-chinese_driving_rule',
'cmmlu-chinese_food_culture',
'cmmlu-chinese_foreign_policy',
'cmmlu-chinese_history',
'cmmlu-chinese_literature',
'cmmlu-chinese_teacher_qualification',
'cmmlu-construction_project_management',
'cmmlu-elementary_chinese',
'cmmlu-elementary_commonsense',
'cmmlu-ethnology',
'cmmlu-high_school_politics',
'cmmlu-modern_chinese',
'cmmlu-traditional_chinese_medicine',
]),
dict(name='cmmlu',
subsets=[
'cmmlu-agronomy',
'cmmlu-anatomy',
'cmmlu-ancient_chinese',
'cmmlu-arts',
'cmmlu-astronomy',
'cmmlu-business_ethics',
'cmmlu-chinese_civil_service_exam',
'cmmlu-chinese_driving_rule',
'cmmlu-chinese_food_culture',
'cmmlu-chinese_foreign_policy',
'cmmlu-chinese_history',
'cmmlu-chinese_literature',
'cmmlu-chinese_teacher_qualification',
'cmmlu-college_actuarial_science',
'cmmlu-college_education',
'cmmlu-college_engineering_hydrology',
'cmmlu-college_law',
'cmmlu-college_mathematics',
'cmmlu-college_medical_statistics',
'cmmlu-clinical_knowledge',
'cmmlu-college_medicine',
'cmmlu-computer_science',
'cmmlu-computer_security',
'cmmlu-conceptual_physics',
'cmmlu-construction_project_management',
'cmmlu-economics',
'cmmlu-education',
'cmmlu-elementary_chinese',
'cmmlu-elementary_commonsense',
'cmmlu-elementary_information_and_technology',
'cmmlu-electrical_engineering',
'cmmlu-elementary_mathematics',
'cmmlu-ethnology',
'cmmlu-food_science',
'cmmlu-genetics',
'cmmlu-global_facts',
'cmmlu-high_school_biology',
'cmmlu-high_school_chemistry',
'cmmlu-high_school_geography',
'cmmlu-high_school_mathematics',
'cmmlu-high_school_physics',
'cmmlu-high_school_politics',
'cmmlu-human_sexuality',
'cmmlu-international_law',
'cmmlu-journalism',
'cmmlu-jurisprudence',
'cmmlu-legal_and_moral_basis',
'cmmlu-logical',
'cmmlu-machine_learning',
'cmmlu-management',
'cmmlu-marketing',
'cmmlu-marxist_theory',
'cmmlu-modern_chinese',
'cmmlu-nutrition',
'cmmlu-philosophy',
'cmmlu-professional_accounting',
'cmmlu-professional_law',
'cmmlu-professional_medicine',
'cmmlu-professional_psychology',
'cmmlu-public_relations',
'cmmlu-security_study',
'cmmlu-sociology',
'cmmlu-sports_science',
'cmmlu-traditional_chinese_medicine',
'cmmlu-virology',
'cmmlu-world_history',
'cmmlu-world_religions',
]),
dict(name='ceval-stem',
subsets=[
'ceval-computer_network',
'ceval-operating_system',
'ceval-computer_architecture',
'ceval-college_programming',
'ceval-college_physics',
'ceval-college_chemistry',
'ceval-advanced_mathematics',
'ceval-probability_and_statistics',
'ceval-discrete_mathematics',
'ceval-electrical_engineer',
'ceval-metrology_engineer',
'ceval-high_school_mathematics',
'ceval-high_school_physics',
'ceval-high_school_chemistry',
'ceval-high_school_biology',
'ceval-middle_school_mathematics',
'ceval-middle_school_biology',
'ceval-middle_school_physics',
'ceval-middle_school_chemistry',
'ceval-veterinary_medicine',
]),
dict(name='ceval-social-science',
subsets=[
'ceval-college_economics',
'ceval-business_administration',
'ceval-marxism',
'ceval-mao_zedong_thought',
'ceval-education_science',
'ceval-teacher_qualification',
'ceval-high_school_politics',
'ceval-high_school_geography',
'ceval-middle_school_politics',
'ceval-middle_school_geography',
]),
dict(name='ceval-humanities',
subsets=[
'ceval-modern_chinese_history',
'ceval-ideological_and_moral_cultivation',
'ceval-logic',
'ceval-law',
'ceval-chinese_language_and_literature',
'ceval-art_studies',
'ceval-professional_tour_guide',
'ceval-legal_professional',
'ceval-high_school_chinese',
'ceval-high_school_history',
'ceval-middle_school_history',
]),
dict(name='ceval-other',
subsets=[
'ceval-civil_servant',
'ceval-sports_science',
'ceval-plant_protection',
'ceval-basic_medicine',
'ceval-clinical_medicine',
'ceval-urban_and_rural_planner',
'ceval-accountant',
'ceval-fire_engineer',
'ceval-environmental_impact_assessment_engineer',
'ceval-tax_accountant',
'ceval-physician',
]),
dict(name='ceval-hard',
subsets=[
'ceval-advanced_mathematics',
'ceval-discrete_mathematics',
'ceval-probability_and_statistics',
'ceval-college_chemistry',
'ceval-college_physics',
'ceval-high_school_mathematics',
'ceval-high_school_chemistry',
'ceval-high_school_physics',
]),
dict(name='ceval',
subsets=[
'ceval-computer_network',
'ceval-operating_system',
'ceval-computer_architecture',
'ceval-college_programming',
'ceval-college_physics',
'ceval-college_chemistry',
'ceval-advanced_mathematics',
'ceval-probability_and_statistics',
'ceval-discrete_mathematics',
'ceval-electrical_engineer',
'ceval-metrology_engineer',
'ceval-high_school_mathematics',
'ceval-high_school_physics',
'ceval-high_school_chemistry',
'ceval-high_school_biology',
'ceval-middle_school_mathematics',
'ceval-middle_school_biology',
'ceval-middle_school_physics',
'ceval-middle_school_chemistry',
'ceval-veterinary_medicine',
'ceval-college_economics',
'ceval-business_administration',
'ceval-marxism',
'ceval-mao_zedong_thought',
'ceval-education_science',
'ceval-teacher_qualification',
'ceval-high_school_politics',
'ceval-high_school_geography',
'ceval-middle_school_politics',
'ceval-middle_school_geography',
'ceval-modern_chinese_history',
'ceval-ideological_and_moral_cultivation',
'ceval-logic',
'ceval-law',
'ceval-chinese_language_and_literature',
'ceval-art_studies',
'ceval-professional_tour_guide',
'ceval-legal_professional',
'ceval-high_school_chinese',
'ceval-high_school_history',
'ceval-middle_school_history',
'ceval-civil_servant',
'ceval-sports_science',
'ceval-plant_protection',
'ceval-basic_medicine',
'ceval-clinical_medicine',
'ceval-urban_and_rural_planner',
'ceval-accountant',
'ceval-fire_engineer',
'ceval-environmental_impact_assessment_engineer',
'ceval-tax_accountant',
'ceval-physician',
]),
dict(name='ceval-test-stem',
subsets=[
'ceval-test-computer_network',
'ceval-test-operating_system',
'ceval-test-computer_architecture',
'ceval-test-college_programming',
'ceval-test-college_physics',
'ceval-test-college_chemistry',
'ceval-test-advanced_mathematics',
'ceval-test-probability_and_statistics',
'ceval-test-discrete_mathematics',
'ceval-test-electrical_engineer',
'ceval-test-metrology_engineer',
'ceval-test-high_school_mathematics',
'ceval-test-high_school_physics',
'ceval-test-high_school_chemistry',
'ceval-test-high_school_biology',
'ceval-test-middle_school_mathematics',
'ceval-test-middle_school_biology',
'ceval-test-middle_school_physics',
'ceval-test-middle_school_chemistry',
'ceval-test-veterinary_medicine',
]),
dict(name='ceval-test-social-science',
subsets=[
'ceval-test-college_economics',
'ceval-test-business_administration',
'ceval-test-marxism',
'ceval-test-mao_zedong_thought',
'ceval-test-education_science',
'ceval-test-teacher_qualification',
'ceval-test-high_school_politics',
'ceval-test-high_school_geography',
'ceval-test-middle_school_politics',
'ceval-test-middle_school_geography',
]),
dict(name='ceval-test-humanities',
subsets=[
'ceval-test-modern_chinese_history',
'ceval-test-ideological_and_moral_cultivation',
'ceval-test-logic',
'ceval-test-law',
'ceval-test-chinese_language_and_literature',
'ceval-test-art_studies',
'ceval-test-professional_tour_guide',
'ceval-test-legal_professional',
'ceval-test-high_school_chinese',
'ceval-test-high_school_history',
'ceval-test-middle_school_history',
]),
dict(name='ceval-test-other',
subsets=[
'ceval-test-civil_servant',
'ceval-test-sports_science',
'ceval-test-plant_protection',
'ceval-test-basic_medicine',
'ceval-test-clinical_medicine',
'ceval-test-urban_and_rural_planner',
'ceval-test-accountant',
'ceval-test-fire_engineer',
'ceval-test-environmental_impact_assessment_engineer',
'ceval-test-tax_accountant',
'ceval-test-physician',
]),
dict(name='ceval-test-hard',
subsets=[
'ceval-test-advanced_mathematics',
'ceval-test-discrete_mathematics',
'ceval-test-probability_and_statistics',
'ceval-test-college_chemistry',
'ceval-test-college_physics',
'ceval-test-high_school_mathematics',
'ceval-test-high_school_chemistry',
'ceval-test-high_school_physics',
]),
dict(name='ceval-test',
subsets=[
'ceval-test-computer_network',
'ceval-test-operating_system',
'ceval-test-computer_architecture',
'ceval-test-college_programming',
'ceval-test-college_physics',
'ceval-test-college_chemistry',
'ceval-test-advanced_mathematics',
'ceval-test-probability_and_statistics',
'ceval-test-discrete_mathematics',
'ceval-test-electrical_engineer',
'ceval-test-metrology_engineer',
'ceval-test-high_school_mathematics',
'ceval-test-high_school_physics',
'ceval-test-high_school_chemistry',
'ceval-test-high_school_biology',
'ceval-test-middle_school_mathematics',
'ceval-test-middle_school_biology',
'ceval-test-middle_school_physics',
'ceval-test-middle_school_chemistry',
'ceval-test-veterinary_medicine',
'ceval-test-college_economics',
'ceval-test-business_administration',
'ceval-test-marxism',
'ceval-test-mao_zedong_thought',
'ceval-test-education_science',
'ceval-test-teacher_qualification',
'ceval-test-high_school_politics',
'ceval-test-high_school_geography',
'ceval-test-middle_school_politics',
'ceval-test-middle_school_geography',
'ceval-test-modern_chinese_history',
'ceval-test-ideological_and_moral_cultivation',
'ceval-test-logic',
'ceval-test-law',
'ceval-test-chinese_language_and_literature',
'ceval-test-art_studies',
'ceval-test-professional_tour_guide',
'ceval-test-legal_professional',
'ceval-test-high_school_chinese',
'ceval-test-high_school_history',
'ceval-test-middle_school_history',
'ceval-test-civil_servant',
'ceval-test-sports_science',
'ceval-test-plant_protection',
'ceval-test-basic_medicine',
'ceval-test-clinical_medicine',
'ceval-test-urban_and_rural_planner',
'ceval-test-accountant',
'ceval-test-fire_engineer',
'ceval-test-environmental_impact_assessment_engineer',
'ceval-test-tax_accountant',
'ceval-test-physician',
]),
dict(name='bbh',
subsets=[
'bbh-temporal_sequences',
'bbh-disambiguation_qa',
'bbh-date_understanding',
'bbh-tracking_shuffled_objects_three_objects',
'bbh-penguins_in_a_table',
'bbh-geometric_shapes',
'bbh-snarks',
'bbh-ruin_names',
'bbh-tracking_shuffled_objects_seven_objects',
'bbh-tracking_shuffled_objects_five_objects',
'bbh-logical_deduction_three_objects',
'bbh-hyperbaton',
'bbh-logical_deduction_five_objects',
'bbh-logical_deduction_seven_objects',
'bbh-movie_recommendation',
'bbh-salient_translation_error_detection',
'bbh-reasoning_about_colored_objects',
'bbh-multistep_arithmetic_two',
'bbh-navigate',
'bbh-dyck_languages',
'bbh-word_sorting',
'bbh-sports_understanding',
'bbh-boolean_expressions',
'bbh-object_counting',
'bbh-formal_fallacies',
'bbh-causal_judgement',
'bbh-web_of_lies',
]),
dict(name='GaokaoBench',
subsets=[
'GaokaoBench_2010-2022_Math_II_MCQs',
'GaokaoBench_2010-2022_Math_I_MCQs',
'GaokaoBench_2010-2022_History_MCQs',
'GaokaoBench_2010-2022_Biology_MCQs',
'GaokaoBench_2010-2022_Political_Science_MCQs',
'GaokaoBench_2010-2022_Physics_MCQs',
'GaokaoBench_2010-2022_Chemistry_MCQs',
'GaokaoBench_2010-2013_English_MCQs',
'GaokaoBench_2010-2022_Chinese_Modern_Lit',
'GaokaoBench_2010-2022_English_Fill_in_Blanks',
'GaokaoBench_2012-2022_English_Cloze_Test',
'GaokaoBench_2010-2022_Geography_MCQs',
'GaokaoBench_2010-2022_English_Reading_Comp',
'GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs',
],
weights=dict(
{'GaokaoBench_2010-2013_English_MCQs': 105,
'GaokaoBench_2010-2022_Biology_MCQs': 900,
'GaokaoBench_2010-2022_Chemistry_MCQs': 744,
'GaokaoBench_2010-2022_Chinese_Lang_and_Usage_MCQs': 240,
'GaokaoBench_2010-2022_Chinese_Modern_Lit': 261,
'GaokaoBench_2010-2022_English_Fill_in_Blanks': 900.0,
'GaokaoBench_2010-2022_English_Reading_Comp': 940,
'GaokaoBench_2010-2022_Geography_MCQs': 380,
'GaokaoBench_2010-2022_History_MCQs': 1148,
'GaokaoBench_2010-2022_Math_II_MCQs': 1090,
'GaokaoBench_2010-2022_Math_I_MCQs': 1070,
'GaokaoBench_2010-2022_Physics_MCQs': 384,
'GaokaoBench_2010-2022_Political_Science_MCQs': 1280,
'GaokaoBench_2012-2022_English_Cloze_Test': 260})),
dict(name='flores_100_Indo-European-Germanic_English',
subsets=[
'flores_100_afr-eng',
'flores_100_dan-eng',
'flores_100_deu-eng',
'flores_100_isl-eng',
'flores_100_ltz-eng',
'flores_100_nld-eng',
'flores_100_nob-eng',
'flores_100_swe-eng',
]),
dict(name='flores_100_English_Indo-European-Germanic',
subsets=[
'flores_100_eng-afr',
'flores_100_eng-dan',
'flores_100_eng-deu',
'flores_100_eng-isl',
'flores_100_eng-ltz',
'flores_100_eng-nld',
'flores_100_eng-nob',
'flores_100_eng-swe',
]),
dict(name='flores_100_Indo-European-Romance_English',
subsets=[
'flores_100_ast-eng',
'flores_100_cat-eng',
'flores_100_fra-eng',
'flores_100_glg-eng',
'flores_100_oci-eng',
'flores_100_por-eng',
'flores_100_ron-eng',
'flores_100_spa-eng',
]),
dict(name='flores_100_English_Indo-European-Romance',
subsets=[
'flores_100_eng-ast',
'flores_100_eng-cat',
'flores_100_eng-fra',
'flores_100_eng-glg',
'flores_100_eng-oci',
'flores_100_eng-por',
'flores_100_eng-ron',
'flores_100_eng-spa',
]),
dict(name='flores_100_Indo-European-Slavic_English',
subsets=[
'flores_100_bel-eng',
'flores_100_bos-eng',
'flores_100_bul-eng',
'flores_100_ces-eng',
'flores_100_hrv-eng',
'flores_100_mkd-eng',
'flores_100_pol-eng',
'flores_100_rus-eng',
'flores_100_slk-eng',
'flores_100_slv-eng',
'flores_100_srp-eng',
'flores_100_ukr-eng',
]),
dict(name='flores_100_English_Indo-European-Slavic',
subsets=[
'flores_100_eng-bel',
'flores_100_eng-bos',
'flores_100_eng-bul',
'flores_100_eng-ces',
'flores_100_eng-hrv',
'flores_100_eng-mkd',
'flores_100_eng-pol',
'flores_100_eng-rus',
'flores_100_eng-slk',
'flores_100_eng-slv',
'flores_100_eng-srp',
'flores_100_eng-ukr',
]),
dict(name='flores_100_Indo-European-Indo-Aryan_English',
subsets=[
'flores_100_asm-eng',
'flores_100_ben-eng',
'flores_100_guj-eng',
'flores_100_hin-eng',
'flores_100_mar-eng',
'flores_100_npi-eng',
'flores_100_ory-eng',
'flores_100_pan-eng',
'flores_100_snd-eng',
'flores_100_urd-eng',
]),
dict(name='flores_100_English_Indo-European-Indo-Aryan',
subsets=[
'flores_100_eng-asm',
'flores_100_eng-ben',
'flores_100_eng-guj',
'flores_100_eng-hin',
'flores_100_eng-mar',
'flores_100_eng-npi',
'flores_100_eng-ory',
'flores_100_eng-pan',
'flores_100_eng-snd',
'flores_100_eng-urd',
]),
dict(name='flores_100_Indo-European-Other_English',
subsets=[
'flores_100_ckb-eng',
'flores_100_cym-eng',
'flores_100_ell-eng',
'flores_100_fas-eng',
'flores_100_gle-eng',
'flores_100_hye-eng',
'flores_100_ita-eng',
'flores_100_lav-eng',
'flores_100_lit-eng',
'flores_100_pus-eng',
'flores_100_tgk-eng',
]),
dict(name='flores_100_English_Indo-European-Other',
subsets=[
'flores_100_eng-ckb',
'flores_100_eng-cym',
'flores_100_eng-ell',
'flores_100_eng-fas',
'flores_100_eng-gle',
'flores_100_eng-hye',
'flores_100_eng-ita',
'flores_100_eng-lav',
'flores_100_eng-lit',
'flores_100_eng-pus',
'flores_100_eng-tgk',
]),
dict(name='flores_100_Austronesian_English',
subsets=[
'flores_100_ceb-eng',
'flores_100_ind-eng',
'flores_100_jav-eng',
'flores_100_mri-eng',
'flores_100_msa-eng',
'flores_100_tgl-eng',
]),
dict(name='flores_100_English_Austronesian',
subsets=[
'flores_100_eng-ceb',
'flores_100_eng-ind',
'flores_100_eng-jav',
'flores_100_eng-mri',
'flores_100_eng-msa',
'flores_100_eng-tgl',
]),
dict(name='flores_100_Atlantic-Congo_English',
subsets=[
'flores_100_ibo-eng',
'flores_100_kam-eng',
'flores_100_kea-eng',
'flores_100_lin-eng',
'flores_100_lug-eng',
'flores_100_nso-eng',
'flores_100_nya-eng',
'flores_100_sna-eng',
'flores_100_swh-eng',
'flores_100_umb-eng',
'flores_100_wol-eng',
'flores_100_xho-eng',
'flores_100_yor-eng',
'flores_100_zul-eng',
]),
dict(name='flores_100_English_Atlantic-Congo',
subsets=[
'flores_100_eng-ibo',
'flores_100_eng-kam',
'flores_100_eng-kea',
'flores_100_eng-lin',
'flores_100_eng-lug',
'flores_100_eng-nso',
'flores_100_eng-nya',
'flores_100_eng-sna',
'flores_100_eng-swh',
'flores_100_eng-umb',
'flores_100_eng-wol',
'flores_100_eng-xho',
'flores_100_eng-yor',
'flores_100_eng-zul',
]),
dict(name='flores_100_Afro-Asiatic_English',
subsets=[
'flores_100_amh-eng',
'flores_100_ara-eng',
'flores_100_ful-eng',
'flores_100_mlt-eng',
'flores_100_orm-eng',
'flores_100_som-eng',
]),
dict(name='flores_100_English_Afro-Asiatic',
subsets=[
'flores_100_eng-amh',
'flores_100_eng-ara',
'flores_100_eng-ful',
'flores_100_eng-mlt',
'flores_100_eng-orm',
'flores_100_eng-som',
]),
dict(name='flores_100_Turkic_English',
subsets=[
'flores_100_azj-eng',
'flores_100_kaz-eng',
'flores_100_kir-eng',
'flores_100_tur-eng',
'flores_100_uzb-eng',
]),
dict(name='flores_100_English_Turkic',
subsets=[
'flores_100_eng-azj',
'flores_100_eng-kaz',
'flores_100_eng-kir',
'flores_100_eng-tur',
'flores_100_eng-uzb',
]),
dict(name='flores_100_Dravidian_English',
subsets=[
'flores_100_kan-eng',
'flores_100_mal-eng',
'flores_100_tam-eng',
'flores_100_tel-eng',
]),
dict(name='flores_100_English_Dravidian',
subsets=[
'flores_100_eng-kan',
'flores_100_eng-mal',
'flores_100_eng-tam',
'flores_100_eng-tel',
]),
dict(name='flores_100_Sino-Tibetan_English',
subsets=[
'flores_100_mya-eng',
'flores_100_zho_simpl-eng',
'flores_100_zho_trad-eng',
]),
dict(name='flores_100_English_Sino-Tibetan',
subsets=[
'flores_100_eng-mya',
'flores_100_eng-zho_simpl',
'flores_100_eng-zho_trad',
]),
dict(name='flores_100_Other_English',
subsets=[
'flores_100_est-eng',
'flores_100_fin-eng',
'flores_100_hau-eng',
'flores_100_heb-eng',
'flores_100_hun-eng',
'flores_100_jpn-eng',
'flores_100_kat-eng',
'flores_100_khm-eng',
'flores_100_kor-eng',
'flores_100_lao-eng',
'flores_100_luo-eng',
'flores_100_mon-eng',
'flores_100_tha-eng',
'flores_100_vie-eng',
]),
dict(name='flores_100_English_Other',
subsets=[
'flores_100_eng-est',
'flores_100_eng-fin',
'flores_100_eng-hau',
'flores_100_eng-heb',
'flores_100_eng-hun',
'flores_100_eng-jpn',
'flores_100_eng-kat',
'flores_100_eng-khm',
'flores_100_eng-kor',
'flores_100_eng-lao',
'flores_100_eng-luo',
'flores_100_eng-mon',
'flores_100_eng-tha',
'flores_100_eng-vie',
]),
dict(name='flores_100',
subsets=[
'flores_100_afr-eng',
'flores_100_dan-eng',
'flores_100_deu-eng',
'flores_100_isl-eng',
'flores_100_ltz-eng',
'flores_100_nld-eng',
'flores_100_nob-eng',
'flores_100_swe-eng',
'flores_100_ast-eng',
'flores_100_cat-eng',
'flores_100_fra-eng',
'flores_100_glg-eng',
'flores_100_oci-eng',
'flores_100_por-eng',
'flores_100_ron-eng',
'flores_100_spa-eng',
'flores_100_bel-eng',
'flores_100_bos-eng',
'flores_100_bul-eng',
'flores_100_ces-eng',
'flores_100_hrv-eng',
'flores_100_mkd-eng',
'flores_100_pol-eng',
'flores_100_rus-eng',
'flores_100_slk-eng',
'flores_100_slv-eng',
'flores_100_srp-eng',
'flores_100_ukr-eng',
'flores_100_asm-eng',
'flores_100_ben-eng',
'flores_100_guj-eng',
'flores_100_hin-eng',
'flores_100_mar-eng',
'flores_100_npi-eng',
'flores_100_ory-eng',
'flores_100_pan-eng',
'flores_100_snd-eng',
'flores_100_urd-eng',
'flores_100_ckb-eng',
'flores_100_cym-eng',
'flores_100_ell-eng',
'flores_100_fas-eng',
'flores_100_gle-eng',
'flores_100_hye-eng',
'flores_100_ita-eng',
'flores_100_lav-eng',
'flores_100_lit-eng',
'flores_100_pus-eng',
'flores_100_tgk-eng',
'flores_100_ceb-eng',
'flores_100_ind-eng',
'flores_100_jav-eng',
'flores_100_mri-eng',
'flores_100_msa-eng',
'flores_100_tgl-eng',
'flores_100_ibo-eng',
'flores_100_kam-eng',
'flores_100_kea-eng',
'flores_100_lin-eng',
'flores_100_lug-eng',
'flores_100_nso-eng',
'flores_100_nya-eng',
'flores_100_sna-eng',
'flores_100_swh-eng',
'flores_100_umb-eng',
'flores_100_wol-eng',
'flores_100_xho-eng',
'flores_100_yor-eng',
'flores_100_zul-eng',
'flores_100_amh-eng',
'flores_100_ara-eng',
'flores_100_ful-eng',
'flores_100_mlt-eng',
'flores_100_orm-eng',
'flores_100_som-eng',
'flores_100_azj-eng',
'flores_100_kaz-eng',
'flores_100_kir-eng',
'flores_100_tur-eng',
'flores_100_uzb-eng',
'flores_100_kan-eng',
'flores_100_mal-eng',
'flores_100_tam-eng',
'flores_100_tel-eng',
'flores_100_mya-eng',
'flores_100_zho_simpl-eng',
'flores_100_zho_trad-eng',
'flores_100_est-eng',
'flores_100_fin-eng',
'flores_100_hau-eng',
'flores_100_heb-eng',
'flores_100_hun-eng',
'flores_100_jpn-eng',
'flores_100_kat-eng',
'flores_100_khm-eng',
'flores_100_kor-eng',
'flores_100_lao-eng',
'flores_100_luo-eng',
'flores_100_mon-eng',
'flores_100_tha-eng',
'flores_100_vie-eng',
'flores_100_eng-afr',
'flores_100_eng-dan',
'flores_100_eng-deu',
'flores_100_eng-isl',
'flores_100_eng-ltz',
'flores_100_eng-nld',
'flores_100_eng-nob',
'flores_100_eng-swe',
'flores_100_eng-ast',
'flores_100_eng-cat',
'flores_100_eng-fra',
'flores_100_eng-glg',
'flores_100_eng-oci',
'flores_100_eng-por',
'flores_100_eng-ron',
'flores_100_eng-spa',
'flores_100_eng-bel',
'flores_100_eng-bos',
'flores_100_eng-bul',
'flores_100_eng-ces',
'flores_100_eng-hrv',
'flores_100_eng-mkd',
'flores_100_eng-pol',
'flores_100_eng-rus',
'flores_100_eng-slk',
'flores_100_eng-slv',
'flores_100_eng-srp',
'flores_100_eng-ukr',
'flores_100_eng-asm',
'flores_100_eng-ben',
'flores_100_eng-guj',
'flores_100_eng-hin',
'flores_100_eng-mar',
'flores_100_eng-npi',
'flores_100_eng-ory',
'flores_100_eng-pan',
'flores_100_eng-snd',
'flores_100_eng-urd',
'flores_100_eng-ckb',
'flores_100_eng-cym',
'flores_100_eng-ell',
'flores_100_eng-fas',
'flores_100_eng-gle',
'flores_100_eng-hye',
'flores_100_eng-ita',
'flores_100_eng-lav',
'flores_100_eng-lit',
'flores_100_eng-pus',
'flores_100_eng-tgk',
'flores_100_eng-ceb',
'flores_100_eng-ind',
'flores_100_eng-jav',
'flores_100_eng-mri',
'flores_100_eng-msa',
'flores_100_eng-tgl',
'flores_100_eng-ibo',
'flores_100_eng-kam',
'flores_100_eng-kea',
'flores_100_eng-lin',
'flores_100_eng-lug',
'flores_100_eng-nso',
'flores_100_eng-nya',
'flores_100_eng-sna',
'flores_100_eng-swh',
'flores_100_eng-umb',
'flores_100_eng-wol',
'flores_100_eng-xho',
'flores_100_eng-yor',
'flores_100_eng-zul',
'flores_100_eng-amh',
'flores_100_eng-ara',
'flores_100_eng-ful',
'flores_100_eng-mlt',
'flores_100_eng-orm',
'flores_100_eng-som',
'flores_100_eng-azj',
'flores_100_eng-kaz',
'flores_100_eng-kir',
'flores_100_eng-tur',
'flores_100_eng-uzb',
'flores_100_eng-kan',
'flores_100_eng-mal',
'flores_100_eng-tam',
'flores_100_eng-tel',
'flores_100_eng-mya',
'flores_100_eng-zho_simpl',
'flores_100_eng-zho_trad',
'flores_100_eng-est',
'flores_100_eng-fin',
'flores_100_eng-hau',
'flores_100_eng-heb',
'flores_100_eng-hun',
'flores_100_eng-jpn',
'flores_100_eng-kat',
'flores_100_eng-khm',
'flores_100_eng-kor',
'flores_100_eng-lao',
'flores_100_eng-luo',
'flores_100_eng-mon',
'flores_100_eng-tha',
'flores_100_eng-vie',
]),
dict(name='tydiqa-goldp',
subsets=[
'tydiqa-goldp_arabic',
'tydiqa-goldp_bengali',
'tydiqa-goldp_english',
'tydiqa-goldp_finnish',
'tydiqa-goldp_indonesian',
'tydiqa-goldp_japanese',
'tydiqa-goldp_korean',
'tydiqa-goldp_russian',
'tydiqa-goldp_swahili',
'tydiqa-goldp_telugu',
'tydiqa-goldp_thai',
]),
dict(name='xiezhi',
subsets=[
'xiezhi-spec_eng',
'xiezhi-spec_chn',
'xiezhi-inter_eng',
'xiezhi-inter_chn',
]),
dict(name='scibench',
subsets=[
'scibench-atkins',
'scibench-calculus',
'scibench-chemmc',
'scibench-class',
'scibench-diff',
'scibench-fund',
'scibench-matter',
'scibench-quan',
'scibench-stat',
'scibench-thermo',
]),
dict(name='scibench_zs-cot',
subsets=[
'scibench-atkins_zs-cot',
'scibench-calculus_zs-cot',
'scibench-chemmc_zs-cot',
'scibench-class_zs-cot',
'scibench-diff_zs-cot',
'scibench-fund_zs-cot',
'scibench-matter_zs-cot',
'scibench-quan_zs-cot',
'scibench-stat_zs-cot',
'scibench-thermo_zs-cot',
]),
dict(name='scibench_fs',
subsets=[
'scibench-atkins_fs',
'scibench-calculus_fs',
'scibench-chemmc_fs',
'scibench-class_fs',
'scibench-diff_fs',
'scibench-fund_fs',
'scibench-matter_fs',
'scibench-quan_fs',
'scibench-stat_fs',
'scibench-thermo_fs',
]),
dict(name='scibench_fs-cot',
subsets=[
'scibench-atkins_fs-cot',
'scibench-calculus_fs-cot',
'scibench-chemmc_fs-cot',
'scibench-class_fs-cot',
'scibench-diff_fs-cot',
'scibench-fund_fs-cot',
'scibench-matter_fs-cot',
'scibench-quan_fs-cot',
'scibench-stat_fs-cot',
'scibench-thermo_fs-cot',
]),
])
work_dir='./outputs/default/20240306_173837'