|
import json |
|
|
|
from datasets import Dataset, load_dataset |
|
|
|
from opencompass.registry import LOAD_DATASET |
|
|
|
from .base import BaseDataset |
|
|
|
|
|
@LOAD_DATASET.register_module() |
|
class BoolQDataset(BaseDataset): |
|
|
|
@staticmethod |
|
def load(**kwargs): |
|
dataset = load_dataset(**kwargs) |
|
|
|
def preprocess(example): |
|
if example['label'] == 'true': |
|
example['answer'] = 1 |
|
else: |
|
example['answer'] = 0 |
|
return example |
|
|
|
dataset = dataset.map(preprocess) |
|
return dataset |
|
|
|
|
|
@LOAD_DATASET.register_module() |
|
class BoolQDataset_V2(BaseDataset): |
|
|
|
@staticmethod |
|
def load(path): |
|
dataset = [] |
|
with open(path, 'r') as f: |
|
for line in f: |
|
line = json.loads(line) |
|
line['label'] = {'true': 'A', 'false': 'B'}[line['label']] |
|
dataset.append(line) |
|
return Dataset.from_list(dataset) |
|
|
|
|
|
@LOAD_DATASET.register_module() |
|
class BoolQDataset_V3(BaseDataset): |
|
|
|
@staticmethod |
|
def load(path): |
|
dataset = [] |
|
with open(path, 'r') as f: |
|
for line in f: |
|
line = json.loads(line) |
|
line['passage'] = ' -- '.join( |
|
line['passage'].split(' -- ')[1:]) |
|
line['question'] = line['question'][0].upper( |
|
) + line['question'][1:] |
|
dataset.append(line) |
|
return Dataset.from_list(dataset) |
|
|