bias_auc / bias_auc.py
tybrs's picture
Update bias_auc.py
4903673 verified
raw
history blame
7.26 kB
import evaluate
import datasets
from datasets.features import Sequence, Value, ClassLabel
from sklearn.metrics import roc_auc_score
import numpy as np
_DESCRIPTION = """\
Suite of threshold-agnostic metrics that provide a nuanced view
of this unintended bias, by considering the various ways that a
classifier’s score distribution can vary across designated groups.
The following are computed:
- BNSP (Background Negative, Subgroup Positive); and
- BPSN (Background Positive, Subgroup Negative) AUC
"""
_CITATION = """\
@inproceedings{borkan2019nuanced,
title={Nuanced metrics for measuring unintended bias with real data for text classification},
author={Borkan, Daniel and Dixon, Lucas and Sorensen, Jeffrey and Thain, Nithum and Vasserman, Lucy},
booktitle={Companion proceedings of the 2019 world wide web conference},
pages={491--500},
year={2019}
}
"""
_KWARGS_DESCRIPTION = """\
Args:
target list[list[str]]: list containing list of group targeted for each item
label list[int]: list containing label index for each item
output list[list[float]]: list of model output values for each
subgroup list[str] (optional): list of subgroups that appear in target to compute metric over
Returns (for each subgroup in target):
'Subgroup' : Subgroup AUC score,
'BPSN' : BPSN (Background Positive, Subgroup Negative) AUC,
'BNSP' : BNSP (Background Negative, Subgroup Positive) AUC score,
Example:
>>> from evaluate import load
>>> target = [['Islam'],
... ['Sexuality'],
... ['Sexuality'],
... ['Islam']]
>>> label = [0, 0, 1, 1]
>>> output = [[0.44452348351478577, 0.5554765462875366],
... [0.4341845214366913, 0.5658154487609863],
... [0.400595098733902, 0.5994048714637756],
... [0.3840397894382477, 0.6159601807594299]]
>>> metric = load('Intel/bias_auc')
>>> metric.add_batch(target=target,
label=label,
output=output)
>>> metric.compute(target=target,
label=label,
output=output,
subgroups = None)
"""
class BiasAUC(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=datasets.Features(
{
'target': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
'label': Value(dtype='int64', id=None),
'output': Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None),
}
),
reference_urls=["https://arxiv.org/abs/1903.04561"],
)
def _genreate_subgroup(self, targets, labels, outputs, subgroup, target_class=None):
"""Returns label and output score from `targets` and `labels`
if `subgroup` is in list of targeted groups found in `targets`
"""
target_class = target_class if target_class is not None else np.asarray(outputs).shape[-1] - 1
for target, label, result in zip(targets, labels, outputs):
if subgroup in target:
yield label, result[target_class]
def _genreate_bpsn(self, targets, labels, outputs, subgroup, target_class=None):
"""Returns label and output score from `targets` and `labels`
if (1) `subgroup` is in list of targeted groups found in `targets` and
label is not the same as `target_class`; or (2) `subgroup` is not in list of
targeted groups found in `targets` and label is the same as `target_class`
"""
target_class = target_class if target_class is not None else np.asarray(outputs).shape[-1] - 1
for target, label, result in zip(targets, labels, outputs):
if not target:
continue
# background positive
if subgroup not in target and label == target_class:
yield label, result[target_class]
# subgroup negative
elif subgroup in target and label != target_class:
yield label, result[target_class]
def _genreate_bnsp(self, targets, labels, outputs, subgroup, target_class=None):
"""Returns label and output score from `targets` and `labels`
if (1) `subgroup` is not in list of targeted groups found in `targets` and
label is the same as `target_class`; or (2) `subgroup` is in list of
targeted groups found in `targets` and label is not the same as `target_class`
"""
# get the index from class
target_class = target_class if target_class is not None else np.asarray(outputs).shape[-1] - 1
for target, label, result in zip(targets, labels, outputs):
if not target:
continue
# background negative
if subgroup not in target and label != target_class:
yield label, result[target_class]
# subgroup positive
elif subgroup in target and label == target_class:
yield label, result[target_class]
def _get_auc_score(self, gen_func, *args, **kwargs):
try:
y_trues, y_preds = zip(*gen_func(*args, **kwargs))
score = roc_auc_score(y_trues, y_preds)
except ValueError:
print(f"Sample not sufficient: need negative and positive examples for both target subgroup '{args[-1]}' and background set")
score = np.nan
return score
def _auc_by_group(self, target, label, output, subgroup):
""" Compute bias AUC metrics
"""
return {
'Subgroup' : self._get_auc_score(self._genreate_subgroup, target, label, output, subgroup),
'BPSN' : self._get_auc_score(self._genreate_bpsn, target, label, output, subgroup),
'BNSP' : self._get_auc_score(self._genreate_bnsp, target, label, output, subgroup)
}
def _update_overall(self, result, labels, outputs, power_value=-5):
"""Compute the generalized mean of Bias AUCs"""
result['Overall'] = {}
for metric in ['Subgroup', 'BPSN', 'BNSP']:
metric_values = np.array([result[community][metric] for community in result
if community != 'Overall'])
metric_values **= power_value
mean_value = np.power(np.sum(metric_values)/(len(result) - 1), 1/power_value)
result['Overall'][f"{metric} generalized mean"] = mean_value
y_preds = [output[1] for output in outputs]
try:
result['Overall']["Overall AUC"] = roc_auc_score(labels, y_preds)
except ValueError:
result['Overall']["Overall AUC"] = np.nan
return result
def _compute(self, target, label, output, subgroups=None):
if subgroups is None:
subgroups = set(group for group_list in target for group in group_list)
result = {subgroup : self._auc_by_group(target, label, output, subgroup)
for subgroup in subgroups}
result = self._update_overall(result, label, output)
return result