TwT-6
/

api-demo

Model card Files Files and versions Community

api-demo / opencompass-my-api /opencompass /datasets /subjective /information_retrival.py

TwT-6

Upload 2667 files

256a159 verified 8 months ago

raw

history blame contribute delete

2.6 kB

	# flake8: noqa: E501
	import json
	import os.path as osp
	import re
	from typing import Optional

	from datasets import Dataset, DatasetDict

	from opencompass.registry import LOAD_DATASET

	from .subjective_cmp import SubjectiveCmpDataset

	eng_base_prefix = """
	You are a judger. Please impartially judge whether an AI model's response to a question is correct based on the reference answers. You need to provide a conclusion of "correct" or "wrong," followed by the corresponding reasoning.

	Note that since the reference answer is a candidate list, the AI model's response only needs to align with one item in the list to be deemed "correct."

	Your judgment must strictly adhere to the following format:
	Conclusion: [[Correct]]
	Reasoning: xxx.

	Conclusion: [[Wrong]]
	Reasoning: xxx.

	[Question Start]
	{question}
	[Question End]

	[Reference Answers Start]
	{ref}
	[Reference Answers End]

	[Model Response Start]
	"""

	chn_base_prefix = """
	你是一个评判者，请你基于参考答案，公正地评判一个AI模型对于问题的回答是否正确。你需要给出“对或错”的结论，然后再给出相应的理由。
	请注意，由于参考答案是一个候选列表，因此AI模型的回答只要符合列表中的某一项即可判断为“对”。
	你的评判必须严格遵守以下格式：
	结论：[[对]]
	理由：xxx。

	结论：[[错]]
	理由：xxx。

	[问题开始]
	{question}
	[问题结束]

	[参考答案开始]
	{ref}
	[参考答案结束]

	[模型回答开始]
	"""


	def prompt_construct(sample):
	lan = sample['others']['lan']
	question = sample['question']
	if lan == 'zh':
	prefix = chn_base_prefix.format(question=sample['question'],
	ref=str(sample['others']['answers']))
	suffix = '\n[模型回答结束]\n'
	elif lan == 'en':
	prefix = eng_base_prefix.format(question=sample['question'],
	ref=str(sample['others']['answers']))
	suffix = '\n[Model Response End]\n'
	return prefix, suffix


	@LOAD_DATASET.register_module()
	class IRDataset(SubjectiveCmpDataset):

	def load(
	self,
	path: str,
	name: str,
	):
	dataset = list(super().load(path, name))
	subject_dataset = []
	for data in dataset:
	data['gpt4_prefix'], data['gpt4_suffix'] = prompt_construct(data)
	data['judge']['others'] = data['others']
	data['ref'] = str(data['others']['answers'])
	subject_dataset.append(data)
	dataset = Dataset.from_list(subject_dataset)
	return dataset