|
from ..utils.function_utils import multi_choice_judge |
|
|
|
""" |
|
Task: multi-choice selection |
|
Metric: Accuracy |
|
司法考试-案例分析 |
|
""" |
|
def compute_jec_ac(data_dict): |
|
""" |
|
Compute the Accuracy |
|
The JEC dataset has 4 options for each question: A, B, C, D |
|
A prediction is correct if |
|
1. The correct answer appears in the prediction, and |
|
2. Options other than the answer do not appear in the prediction. |
|
""" |
|
score_list, abstentions = [], 0 |
|
option_list = ["A", "B", "C", "D"] |
|
for example in data_dict: |
|
question, prediction, answer = example["origin_prompt"], example["prediction"], example["refr"] |
|
assert answer.startswith("正确答案:") and answer[5] in option_list, f"answer[5]: {answer}, question: {question}" |
|
|
|
answer_letter = answer[5] |
|
judge = multi_choice_judge(prediction, option_list, answer_letter) |
|
score_list.append(judge["score"]) |
|
abstentions += judge["abstention"] |
|
|
|
|
|
accuracy = sum(score_list) / len(score_list) |
|
return {"score": accuracy, "abstention_rate": abstentions / len(data_dict)} |
|
|