File size: 1,483 Bytes
256a159 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
from typing import List
import numpy as np
from sklearn.metrics import roc_auc_score
from opencompass.registry import ICL_EVALUATORS
from .icl_base_evaluator import BaseEvaluator
@ICL_EVALUATORS.register_module()
class AUCROCEvaluator(BaseEvaluator):
"""Calculate AUC-ROC scores and accuracy according the prediction.
For some dataset, the accuracy cannot reveal the difference between
models because of the saturation. AUC-ROC scores can further exam
model abilities to distinguish different labels. More details can refer to
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.roc_auc_score.html
""" # noqa
def __init__(self) -> None:
super().__init__()
def score(self, predictions: List, references: List) -> dict:
"""Calculate scores and accuracy.
Args:
predictions (List): List of probabilities for each class of each
sample.
references (List): List of target labels for each sample.
Returns:
dict: calculated scores.
"""
if len(predictions) != len(references):
return {
'error': 'predictions and references have different length.'
}
auc_score = roc_auc_score(references, np.array(predictions)[:, 1])
accuracy = sum(
references == np.argmax(predictions, axis=1)) / len(references)
return dict(auc_score=auc_score * 100, accuracy=accuracy * 100)
|