Spaces:

unnati
/

kendall_tau_distance

Sleeping

App Files Files Community

unnati commited on Mar 28, 2023

Commit

361f384

•

1 Parent(s): 1ce35f7

Add documenation fix + tests

Browse files

- Edit _KWARGS_DESCRIPTION to describe the expected input & exception
- Add examples to above doc string
- Remove citations, url (consider adding later)
- Add tests - all permutations of ['A', 'B','C']

Files changed (3) hide show

app.py +1 -1
kendall_tau_distance.py +31 -28
tests.py +107 -11

app.py CHANGED Viewed

@@ -3,4 +3,4 @@ from evaluate.utils import launch_gradio_widget
 module = evaluate.load("unnati/kendall_tau_distance")
-launch_gradio_widget(module)


3
4
5	module = evaluate.load("unnati/kendall_tau_distance")
6	+ launch_gradio_widget(module)

kendall_tau_distance.py CHANGED Viewed

@@ -28,39 +28,40 @@ year={2020}
 # TODO: Add description of the module here
 _DESCRIPTION = """\
-This new module is designed to solve this great ML task and is crafted with a lot of care.
 """
 # TODO: Add description of the arguments of the module here
 _KWARGS_DESCRIPTION = """
-Calculates how good are predictions given some references, using certain scores
 Args:
     predictions: list of predictions to score. Each predictions
-        should be a string with tokens separated by spaces.
-    references: list of reference for each prediction. Each
-        reference should be a string with tokens separated by spaces.
 Returns:
     kendall_tau_distance: Kendell's tau distance between predictions and references
     normalized_kendall_tau_distance: Kendell's tau distance between predictions and references normalized by the number of pairs
 Examples:
     Examples should be written in doctest format, and should illustrate how
     to use the function.
     >>> kendall_tau_distance = evaluate.load("kendall_tau_distance")
-    >>> results = kendall_tau_distance.compute(references=[0, 1], predictions=[0, 1])
     >>> print(results)
-    {'kendall_tau_distance': 0, 'normalized_kendall_tau_distance': 0}
 """
-# TODO: Define external resources urls if needed
-BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class kendalltaudistance(evaluate.Metric):
-    """TODO: Short description of my evaluation module."""
     def _info(self):
         # TODO: Specifies the evaluate.EvaluationModuleInfo object
         return evaluate.MetricInfo(
@@ -70,15 +71,12 @@ class kendalltaudistance(evaluate.Metric):
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
-            features=datasets.Features({
-                'predictions': datasets.Value('int64'),
-                'references': datasets.Value('int64'),
-            }),
-            # Homepage of the module for documentation
-            homepage="http://module.homepage",
-            # Additional links to the codebase or references
-            codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
-            reference_urls=["http://path.to.reference.url/new_module"]
         )
     def _compute(self, predictions, references):
@@ -86,19 +84,24 @@ class kendalltaudistance(evaluate.Metric):
         # TODO: Compute the different scores of the module
         n = len(predictions)
-        assert n == len(references), "The number of predictions and references should be the same"
         n_discordant_pairs = 0
         for i in range(len(predictions)):
             j = references.index(predictions[i])
-            n_discordant_pairs += len(set(predictions[:i]).intersection(set(references[j:]))) + len(set(predictions[i+1:]).intersection(set(references[:j])))
         n_discordant_pairs = n_discordant_pairs / 2
         num_pairs = n * (n - 1) / 2
         return {
-            'kendall_tau_distance': n_discordant_pairs,
-            'normalized_kendall_tau_distance': n_discordant_pairs / num_pairs,
-        }

 # TODO: Add description of the module here
 _DESCRIPTION = """\
+This new module is designed calculate kendall's tau distance between predictions and references.
+It is also known as bubble sort distance.
+It is equivalent to number of adjacent swaps required to convert predictions to references.
 """
 # TODO: Add description of the arguments of the module here
 _KWARGS_DESCRIPTION = """
+Calculates how good are predictions given some references, usoing kendall's tau distance.
 Args:
     predictions: list of predictions to score. Each predictions
+        should be a string or tokens or int. The predictions should be unique.
+    references: list of reference for each prediction. Each reference
+        should be a string or tokens or int. The values in predictions and references should be the same.
 Returns:
     kendall_tau_distance: Kendell's tau distance between predictions and references
     normalized_kendall_tau_distance: Kendell's tau distance between predictions and references normalized by the number of pairs
+Exceptions:
+    AssertionError: If the predictions are not unique or if the values in predictions and references are not the same
 Examples:
     Examples should be written in doctest format, and should illustrate how
     to use the function.
     >>> kendall_tau_distance = evaluate.load("kendall_tau_distance")
+    >>> results = kendall_tau_distance.compute(references=[0, 1], predictions=[1, 0])
     >>> print(results)
+    {'kendall_tau_distance': 1.0, 'normalized_kendall_tau_distance': 1.0}
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class kendalltaudistance(evaluate.Metric):
     def _info(self):
         # TODO: Specifies the evaluate.EvaluationModuleInfo object
         return evaluate.MetricInfo(
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("int64"),
+                    "references": datasets.Value("int64"),
+                }
+            )
         )
     def _compute(self, predictions, references):
         # TODO: Compute the different scores of the module
         n = len(predictions)
+        assert len(set(predictions)) == n, "The predictions should be unique"
+        assert set(predictions) == set(
+            references
+        ), "The values in predictions and references should be the same"
         n_discordant_pairs = 0
         for i in range(len(predictions)):
             j = references.index(predictions[i])
+            n_discordant_pairs += len(
+                set(predictions[:i]).intersection(set(references[j:]))
+            ) + len(set(predictions[i + 1 :]).intersection(set(references[:j])))
         n_discordant_pairs = n_discordant_pairs / 2
         num_pairs = n * (n - 1) / 2
         return {
+            "kendall_tau_distance": n_discordant_pairs,
+            "normalized_kendall_tau_distance": n_discordant_pairs / num_pairs,
+        }

tests.py CHANGED Viewed

@@ -1,17 +1,113 @@
 test_cases = [
     {
-        "predictions": [0, 0],
-        "references": [1, 1],
-        "result": {"metric_score": 0}
     },
     {
-        "predictions": [1, 1],
-        "references": [1, 1],
-        "result": {"metric_score": 1}
     },
     {
-        "predictions": [1, 0],
-        "references": [1, 1],
-        "result": {"metric_score": 0.5}
-    }
-]

 test_cases = [
     {
+        "predictions": ("A", "B", "C"),
+        "references": ("A", "C", "B"),
+        "result": {
+            "kendall_tau_distance": 1.0,
+            "normalized_kendall_tau_distance": 0.3333333333333333,
+        },
     },
     {
+        "predictions": ("A", "B", "C"),
+        "references": ("B", "A", "C"),
+        "result": {
+            "kendall_tau_distance": 1.0,
+            "normalized_kendall_tau_distance": 0.3333333333333333,
+        },
     },
     {
+        "predictions": ("A", "B", "C"),
+        "references": ("B", "C", "A"),
+        "result": {
+            "kendall_tau_distance": 2.0,
+            "normalized_kendall_tau_distance": 0.6666666666666666,
+        },
+    },
+    {
+        "predictions": ("A", "B", "C"),
+        "references": ("C", "A", "B"),
+        "result": {
+            "kendall_tau_distance": 2.0,
+            "normalized_kendall_tau_distance": 0.6666666666666666,
+        },
+    },
+    {
+        "predictions": ("A", "B", "C"),
+        "references": ("C", "B", "A"),
+        "result": {"kendall_tau_distance": 3.0, "normalized_kendall_tau_distance": 1.0},
+    },
+    {
+        "predictions": ("A", "C", "B"),
+        "references": ("B", "A", "C"),
+        "result": {
+            "kendall_tau_distance": 2.0,
+            "normalized_kendall_tau_distance": 0.6666666666666666,
+        },
+    },
+    {
+        "predictions": ("A", "C", "B"),
+        "references": ("B", "C", "A"),
+        "result": {"kendall_tau_distance": 3.0, "normalized_kendall_tau_distance": 1.0},
+    },
+    {
+        "predictions": ("A", "C", "B"),
+        "references": ("C", "A", "B"),
+        "result": {
+            "kendall_tau_distance": 1.0,
+            "normalized_kendall_tau_distance": 0.3333333333333333,
+        },
+    },
+    {
+        "predictions": ("A", "C", "B"),
+        "references": ("C", "B", "A"),
+        "result": {
+            "kendall_tau_distance": 2.0,
+            "normalized_kendall_tau_distance": 0.6666666666666666,
+        },
+    },
+    {
+        "predictions": ("B", "A", "C"),
+        "references": ("B", "C", "A"),
+        "result": {
+            "kendall_tau_distance": 1.0,
+            "normalized_kendall_tau_distance": 0.3333333333333333,
+        },
+    },
+    {
+        "predictions": ("B", "A", "C"),
+        "references": ("C", "A", "B"),
+        "result": {"kendall_tau_distance": 3.0, "normalized_kendall_tau_distance": 1.0},
+    },
+    {
+        "predictions": ("B", "A", "C"),
+        "references": ("C", "B", "A"),
+        "result": {
+            "kendall_tau_distance": 2.0,
+            "normalized_kendall_tau_distance": 0.6666666666666666,
+        },
+    },
+    {
+        "predictions": ("B", "C", "A"),
+        "references": ("C", "A", "B"),
+        "result": {
+            "kendall_tau_distance": 2.0,
+            "normalized_kendall_tau_distance": 0.6666666666666666,
+        },
+    },
+    {
+        "predictions": ("B", "C", "A"),
+        "references": ("C", "B", "A"),
+        "result": {
+            "kendall_tau_distance": 1.0,
+            "normalized_kendall_tau_distance": 0.3333333333333333,
+        },
+    },
+    {
+        "predictions": ("C", "A", "B"),
+        "references": ("C", "B", "A"),
+        "result": {
+            "kendall_tau_distance": 1.0,
+            "normalized_kendall_tau_distance": 0.3333333333333333,
+        },
+    },
+]