Spaces:
Sleeping
Sleeping
import datasets | |
import evaluate | |
from sklearn.metrics import ( | |
adjusted_mutual_info_score, | |
adjusted_rand_score, | |
calinski_harabasz_score, | |
completeness_score, | |
davies_bouldin_score, | |
fowlkes_mallows_score, | |
homogeneity_score, | |
silhouette_score, | |
) | |
from sklearn.metrics.cluster import contingency_matrix, pair_confusion_matrix | |
_CITATION = """ | |
@article{scikit-learn, | |
title={Scikit-learn: Machine Learning in {P}ython}, | |
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. | |
and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. | |
and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and | |
Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, | |
journal={Journal of Machine Learning Research}, | |
volume={12}, | |
pages={2825--2830}, | |
year={2011} | |
} | |
""" | |
_DESCRIPTION = """\ | |
This evaluator computes multiple clustering metrics to assess the quality of a clustering. | |
By default, the evaluator works as in an unsupervised setting, evaluating the clustering just from | |
the samples and the predictions. | |
However, it allows to compute additional metrics when truth labels are passed too, which is not shown in this demo. | |
""" | |
_KWARGS_DESCRIPTION = """ | |
Computes the quality of clustering results. | |
Args: | |
samples: vector representations | |
predictions: predicted cluster labels | |
truth_labels (optional): truth labels to compute additional metrics | |
Returns: | |
silhouete_score | |
davies_bouldin_score | |
calinski_harabasz_score | |
completeness_score | |
davies_bouldin_score | |
fowlkes_mallows_score | |
homogeneity_score | |
silhouette_score | |
contingency_matrix | |
pair_confusion_matrix | |
""" | |
class ClusteringEvaluator(evaluate.Metric): | |
def _info(self): | |
return evaluate.MetricInfo( | |
module_type="metric", | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
features=datasets.Features( | |
{ | |
"samples": datasets.Sequence(datasets.Value("float32")), | |
"predictions": datasets.Value("int64"), | |
} | |
), | |
) | |
def _compute(self, samples, predictions, truth_labels=None): | |
unsupervised_metrics = [ | |
silhouette_score, | |
davies_bouldin_score, | |
calinski_harabasz_score, | |
] | |
supervised_metrics = [ | |
adjusted_rand_score, | |
adjusted_mutual_info_score, | |
homogeneity_score, | |
completeness_score, | |
fowlkes_mallows_score, | |
contingency_matrix, | |
pair_confusion_matrix, | |
] | |
results = {} | |
# Compute unsupervised metrics always | |
for fn in unsupervised_metrics: | |
results[fn.__name__] = float(fn(samples, predictions)) | |
# Compute supervised metrics if reference labels are passed | |
if truth_labels is not None: | |
for fn in supervised_metrics: | |
score = fn(truth_labels, predictions) | |
try: | |
score = float(score) | |
except (AttributeError, TypeError): | |
pass | |
results[fn.__name__] = score | |
return results | |