from opencompass.models.claude_api.claude_api import Claude from opencompass.utils.text_postprocessors import last_option_postprocess, first_option_postprocess from opencompass.models.claude_api.postprocessors import (yes_no_postprocess, humaneval_claude2_postprocess, record_postprocess, gsm8k_postprocess, strategyqa_pred_postprocess, mbpp_postprocess, lcsts_postprocess) agieval_single_choice_sets = [ 'gaokao-chinese', 'gaokao-english', 'gaokao-geography', 'gaokao-history', 'gaokao-biology', 'gaokao-chemistry', 'gaokao-mathqa', 'logiqa-zh', 'lsat-ar', 'lsat-lr', 'lsat-rc', 'logiqa-en', 'sat-math', 'sat-en', 'sat-en-without-passage', 'aqua-rat', ] agieval_multiple_choices_sets = [ 'gaokao-physics', 'jec-qa-kd', 'jec-qa-ca', ] claude_postprocessors = { 'ceval-*': dict(type=last_option_postprocess, options='ABCD'), 'bustm-*': dict(type=last_option_postprocess, options='AB'), 'summedits': dict(type=last_option_postprocess, options='AB'), 'WiC': dict(type=last_option_postprocess, options='AB'), 'gsm8k': dict(type=gsm8k_postprocess), 'openai_humaneval': dict(type=humaneval_claude2_postprocess), 'lcsts': dict(type=lcsts_postprocess), 'mbpp': dict(type=mbpp_postprocess), 'strategyqa': dict(type=strategyqa_pred_postprocess), 'WSC': dict(type=yes_no_postprocess), 'BoolQ': dict(type=yes_no_postprocess), 'cmnli': dict(type=first_option_postprocess, options='ABC'), 'ocnli_fc-*': dict(type=first_option_postprocess, options='ABC'), 'MultiRC': dict(type=yes_no_postprocess), 'ReCoRD': dict(type=record_postprocess), 'commonsense_qa': dict(type=last_option_postprocess, options='ABCDE'), } for _name in agieval_multiple_choices_sets + agieval_single_choice_sets: claude_postprocessors[f'agieval-{_name}'] = dict(type=last_option_postprocess, options='ABCDE') models = [ dict(abbr='Claude2', type=Claude, path='claude-2', key='YOUR_CLAUDE_KEY', query_per_second=1, max_out_len=2048, max_seq_len=2048, batch_size=2, pred_postprocessor=claude_postprocessors, ), ]