TwT-6's picture
Upload 2667 files
256a159 verified
raw
history blame
693 Bytes
import re
from datasets import load_dataset
from opencompass.registry import LOAD_DATASET
from .base import BaseDataset
@LOAD_DATASET.register_module()
class CValuesDataset(BaseDataset):
@staticmethod
def load(path):
dataset = load_dataset('json', data_files=path)
def preprocess(example):
example['prompt'] = re.sub('ε›žε€1', 'ε›žε€A', example['prompt'])
example['prompt'] = re.sub('ε›žε€2', 'ε›žε€B', example['prompt'])
example['label'] = re.sub('ε›žε€1', 'A', example['label'])
example['label'] = re.sub('ε›žε€2', 'B', example['label'])
return example
return dataset.map(preprocess)