Spaces:

ner4archives
/

NER4Archives-analytics

Sleeping

App Files Files Community

NER4Archives-analytics / n4a_analytics_lib /metrics_utils.py

lterriel

clean & refactor components + add doc

74e2066 about 2 years ago

raw

history blame contribute delete

2.44 kB

	# -- coding:utf-8 --

	"""Collection of statistics functions.
	"""

	import numpy as np


	def percentage_agreement_pov(total_pov: int, total_annotations: int) -> float:
	"""Computes a percentage
	:param total_pov: total agree/disagree annotations
	:type total_pov: int
	:param total_annotations: total annotations in project
	:type total_annotations: int
	:rtype: float
	:return: agreement percentage
	"""
	return round((total_pov / total_annotations) * 100, 2)


	def fleiss_kappa_function(matrix: list) -> float:
	"""Computes Fleiss' kappa for group of annotators.
	:param matrix: a matrix of shape (:attr:'N', :attr:'k') with
	'N' = number of subjects and 'k' = the number of categories.
	'M[i, j]' represent the number of raters who assigned
	the 'i'th subject to the 'j'th category.
	:type matrix: numpy matrix
	:rtype: float
	:return: Fleiss' kappa score
	"""
	N, _ = matrix.shape # N is # of items, k is # of categories
	n_annotators = float(np.sum(matrix[0, :])) # # of annotators
	tot_annotations = N * n_annotators # the total # of annotations
	category_sum = np.sum(matrix, axis=0) # the sum of each category over all items

	# chance agreement
	p = category_sum / tot_annotations # the distribution of each category over all annotations
	PbarE = np.sum(p * p) # average chance agreement over all categories

	# observed agreement
	P = (np.sum(matrix * matrix, axis=1) - n_annotators) / (n_annotators * (n_annotators - 1))
	Pbar = np.sum(P) / N
	# add all observed agreement
	# chances per item and divide by amount of items

	return round((Pbar - PbarE) / (1 - PbarE), 4)


	def cohen_kappa_function(ann1: list, ann2: list) -> float:
	"""Computes Cohen kappa for pair-wise annotators.
	:param ann1: annotations provided by first annotator
	:type ann1: list
	:param ann2: annotations provided by second annotator
	:type ann2: list
	:rtype: float
	:return: Cohen kappa statistic
	"""
	count = 0
	for an1, an2 in zip(ann1, ann2):
	if an1 == an2:
	count += 1
	A = count / len(ann1) # observed agreement A (Po)

	uniq = set(ann1 + ann2)
	E = 0 # expected agreement E (Pe)
	for item in uniq:
	cnt1 = ann1.count(item)
	cnt2 = ann2.count(item)
	count = (cnt1 / len(ann1)) * (cnt2 / len(ann2))
	E += count

	return round((A - E) / (1 - E), 4)