|
from ..utils.function_utils import compute_f1_two_sets |
|
from ..utils.rc_f1 import CJRCEvaluator |
|
|
|
|
|
""" |
|
task: event detection |
|
metric: F1 score |
|
事件检测 |
|
""" |
|
option_list = ["支付/给付", "欺骗", "搜查/扣押", "要求/请求", "卖出", "买入", "获利", "拘捕", "鉴定", "同意/接受", "供述", "联络", "帮助/救助", "租用/借用", "受伤", "伪造", "卖淫", "伤害人身", "赔偿", "归还/偿还"] |
|
|
|
def compute_sjjc(data_dict): |
|
""" |
|
Compute the F1-score |
|
The sjjc task covers 20 event types. |
|
A question may involve one or more event types. |
|
Given a list of event types from both the ground truth and the prediction, we compute the F1-score between |
|
these two lists. |
|
""" |
|
score_list, abstentions = [], 0 |
|
|
|
for example in data_dict: |
|
question, prediction, answer = example["origin_prompt"], example["prediction"], example["refr"] |
|
|
|
answers = answer.split(";") |
|
|
|
prediction_list =[] |
|
for option in option_list: |
|
if option in prediction: |
|
prediction_list.append(option) |
|
|
|
if len(prediction_list) == 0: |
|
abstentions += 1 |
|
gt_set = set(answers) |
|
pred_set = set(prediction_list) |
|
score = compute_f1_two_sets(gt_set, pred_set) |
|
score_list.append(score) |
|
|
|
f1_score_average = sum(score_list) / len(score_list) |
|
return {"score": f1_score_average, "abstention_rate": abstentions/len(data_dict)} |
|
|
|
""" |
|
task: trigger word extraction |
|
metric: F1 score |
|
触发词抽取 |
|
""" |
|
def compute_cfcy(data_dict): |
|
|
|
scores = 0 |
|
|
|
for example in data_dict: |
|
question, prediction, answer = example["origin_prompt"], example["prediction"], example["refr"] |
|
|
|
answers = answer.split(";") |
|
predictions = prediction.split(";") |
|
intersected = [CJRCEvaluator.compute_f1(r, h) for r, h in zip(answers, predictions)] |
|
|
|
prec = sum(intersected) / len(predictions) if len(predictions) > 0 else 0 |
|
rec = sum(intersected) / len(answers) if len(answers) > 0 else 0 |
|
|
|
scores += 2 * prec * rec / (prec + rec + 1e-10) |
|
|
|
f1_score_average = scores / len(data_dict) |
|
return {"score": f1_score_average} |
|
|