import json import os.path as osp from datasets import Dataset, DatasetDict from opencompass.registry import LOAD_DATASET from .base import BaseDataset @LOAD_DATASET.register_module() class CMBDataset(BaseDataset): @staticmethod def load(path: str): with open(osp.join(path, 'val.json'), 'r', encoding='utf-8') as f: val_data = json.load(f) for d in val_data: d['option_str'] = '\n'.join( [f'{k}. {v}' for k, v in d['option'].items() if len(v) > 1]) d['answer'] = 'NULL' val_dataset = Dataset.from_list(val_data) with open(osp.join(path, 'test.json'), 'r', encoding='utf-8') as f: test_data = json.load(f) for d in test_data: d['option_str'] = '\n'.join( [f'{k}. {v}' for k, v in d['option'].items() if len(v) > 1]) test_dataset = Dataset.from_list(test_data) return DatasetDict({'val': val_dataset, 'test': test_dataset})