import datasets import evaluate from sklearn.metrics import ( adjusted_mutual_info_score, adjusted_rand_score, calinski_harabasz_score, completeness_score, davies_bouldin_score, fowlkes_mallows_score, homogeneity_score, silhouette_score, ) from sklearn.metrics.cluster import contingency_matrix, pair_confusion_matrix _CITATION = """ @article{scikit-learn, title={Scikit-learn: Machine Learning in {P}ython}, author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, journal={Journal of Machine Learning Research}, volume={12}, pages={2825--2830}, year={2011} } """ _DESCRIPTION = """\ This evaluator computes multiple clustering metrics to assess the quality of a clustering. By default, the evaluator works as in an unsupervised setting, evaluating the clustering just from the samples and the predictions. However, it allows to compute additional metrics when truth labels are passed too, which is not shown in this demo. """ _KWARGS_DESCRIPTION = """ Computes the quality of clustering results. Args: samples: vector representations predictions: predicted cluster labels truth_labels (optional): truth labels to compute additional metrics Returns: silhouete_score davies_bouldin_score calinski_harabasz_score completeness_score davies_bouldin_score fowlkes_mallows_score homogeneity_score silhouette_score contingency_matrix pair_confusion_matrix """ @evaluate.utils.file_utils.add_start_docstrings( _DESCRIPTION, _KWARGS_DESCRIPTION ) class ClusteringEvaluator(evaluate.Metric): def _info(self): return evaluate.MetricInfo( module_type="metric", description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=datasets.Features( { "samples": datasets.Sequence(datasets.Value("float32")), "predictions": datasets.Value("int64"), } ), ) def _compute(self, samples, predictions, truth_labels=None): unsupervised_metrics = [ silhouette_score, davies_bouldin_score, calinski_harabasz_score, ] supervised_metrics = [ adjusted_rand_score, adjusted_mutual_info_score, homogeneity_score, completeness_score, fowlkes_mallows_score, contingency_matrix, pair_confusion_matrix, ] results = {} # Compute unsupervised metrics always for fn in unsupervised_metrics: results[fn.__name__] = float(fn(samples, predictions)) # Compute supervised metrics if reference labels are passed if truth_labels is not None: for fn in supervised_metrics: score = fn(truth_labels, predictions) try: score = float(score) except (AttributeError, TypeError): pass results[fn.__name__] = score return results