File size: 845 Bytes
256a159 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import json
from datasets import Dataset, DatasetDict
from .base import BaseDataset
class CommonsenseQADataset_CN(BaseDataset):
@staticmethod
def load(path):
datasetdict = DatasetDict()
for split in ['train', 'validation']:
data = []
with open(path, 'r') as f:
for line in f:
item = json.loads(line)
data.append(item)
def pre_process(example):
for i in range(5):
example[chr(ord('A') + i)] = example['choices']['text'][i]
return example
dataset = Dataset.from_list(data)
dataset = dataset.map(pre_process).remove_columns(
['question_concept', 'id', 'choices'])
datasetdict[split] = dataset
return datasetdict
|