TwT-6's picture
Upload 2667 files
256a159 verified
from datasets import Dataset, load_dataset
from opencompass.registry import LOAD_DATASET, TEXT_POSTPROCESSORS
from ..base import BaseDataset
@LOAD_DATASET.register_module()
class LongBenchlshtDataset(BaseDataset):
@staticmethod
def load(**kwargs):
dataset = load_dataset(**kwargs)
split = 'test'
raw_data = []
for i in range(len(dataset[split])):
question = dataset[split]['input'][i]
context = dataset[split]['context'][i]
answers = dataset[split]['answers'][i]
all_classes = dataset[split]['all_classes'][i]
raw_data.append({
'input': question,
'context': context,
'all_labels': {
'answers': answers,
'all_classes': all_classes
}
})
dataset[split] = Dataset.from_list(raw_data)
return dataset
@TEXT_POSTPROCESSORS.register_module()
def lsht_postprocess(text: str) -> str:
text = text.lstrip('\n').split('\n')[0]
return text