File size: 845 Bytes
256a159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import json

from datasets import Dataset, DatasetDict

from .base import BaseDataset


class CommonsenseQADataset_CN(BaseDataset):

    @staticmethod
    def load(path):
        datasetdict = DatasetDict()
        for split in ['train', 'validation']:
            data = []
            with open(path, 'r') as f:
                for line in f:
                    item = json.loads(line)
                    data.append(item)

            def pre_process(example):
                for i in range(5):
                    example[chr(ord('A') + i)] = example['choices']['text'][i]
                return example

            dataset = Dataset.from_list(data)
            dataset = dataset.map(pre_process).remove_columns(
                ['question_concept', 'id', 'choices'])
            datasetdict[split] = dataset

        return datasetdict