import re from datasets import Dataset from opencompass.registry import LOAD_DATASET from ..base import BaseDataset from .utils import iter_jsonl @LOAD_DATASET.register_module() class InfiniteBenchcoderunDataset(BaseDataset): @staticmethod def load(path: str): dataset = list(iter_jsonl(path)) raw_data = [] for item in dataset: context = item['context'] find_result = re.findall(r'func_[0-9]+\(\-?[0-9]+\)', item['input']) func_call = find_result[0] func = func_call.split('(')[0] answer = item['answer'] raw_data.append({ 'context': context, 'func': func, 'func_call': func_call, 'answer': answer }) dataset = Dataset.from_list(raw_data) return dataset