Spaces:

ner4archives
/

NER4Archives-analytics

Sleeping

File size: 2,437 Bytes

# -*- coding:utf-8 -*-

"""Collection of statistics functions.
"""

import numpy as np


def percentage_agreement_pov(total_pov: int, total_annotations: int) -> float:
    """Computes a percentage
    :param total_pov: total agree/disagree annotations
    :type total_pov: int
    :param total_annotations:  total annotations in project
    :type total_annotations: int
    :rtype: float
    :return: agreement percentage
    """
    return round((total_pov / total_annotations) * 100, 2)


def fleiss_kappa_function(matrix: list) -> float:
    """Computes Fleiss' kappa for group of annotators.
    :param matrix: a matrix of shape (:attr:'N', :attr:'k') with
    'N' = number of subjects and 'k' = the number of categories.
    'M[i, j]' represent the number of raters who assigned
    the 'i'th subject to the 'j'th category.
    :type matrix: numpy matrix
    :rtype: float
    :return: Fleiss' kappa score
    """
    N, _ = matrix.shape  # N is # of items, k is # of categories
    n_annotators = float(np.sum(matrix[0, :]))  # # of annotators
    tot_annotations = N * n_annotators  # the total # of annotations
    category_sum = np.sum(matrix, axis=0)  # the sum of each category over all items

    # chance agreement
    p = category_sum / tot_annotations  # the distribution of each category over all annotations
    PbarE = np.sum(p * p)  # average chance agreement over all categories

    # observed agreement
    P = (np.sum(matrix * matrix, axis=1) - n_annotators) / (n_annotators * (n_annotators - 1))
    Pbar = np.sum(P) / N
    # add all observed agreement
    # chances per item and divide by amount of items

    return round((Pbar - PbarE) / (1 - PbarE), 4)


def cohen_kappa_function(ann1: list, ann2: list) -> float:
    """Computes Cohen kappa for pair-wise annotators.
    :param ann1: annotations provided by first annotator
    :type ann1: list
    :param ann2: annotations provided by second annotator
    :type ann2: list
    :rtype: float
    :return: Cohen kappa statistic
    """
    count = 0
    for an1, an2 in zip(ann1, ann2):
        if an1 == an2:
            count += 1
    A = count / len(ann1)  # observed agreement A (Po)

    uniq = set(ann1 + ann2)
    E = 0  # expected agreement E (Pe)
    for item in uniq:
        cnt1 = ann1.count(item)
        cnt2 = ann2.count(item)
        count = (cnt1 / len(ann1)) * (cnt2 / len(ann2))
        E += count

    return round((A - E) / (1 - E), 4)