Spaces:

bdsaglam
/

jer

Runtime error

App Files Files Community

bdsaglam commited on Oct 8, 2023

Commit

8c5c6b9

•

1 Parent(s): 3cc354a

first commit

Browse files

Files changed (1) hide show

jer.py +27 -10

jer.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import evaluate
 import datasets
 # TODO: Add BibTeX citation
@@ -28,7 +29,7 @@ year={2020}
 # TODO: Add description of the module here
 _DESCRIPTION = """\
-This new module is designed to solve this great ML task and is crafted with a lot of care.
 """
@@ -53,10 +54,6 @@ Examples:
     {'accuracy': 1.0}
 """
-# TODO: Define external resources urls if needed
-BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class jer(evaluate.Metric):
     """TODO: Short description of my evaluation module."""
@@ -71,8 +68,8 @@ class jer(evaluate.Metric):
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
             features=datasets.Features({
-                'predictions': datasets.Value('int64'),
-                'references': datasets.Value('int64'),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
@@ -88,8 +85,28 @@ class jer(evaluate.Metric):
     def _compute(self, predictions, references):
         """Returns the scores"""
-        # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
         return {
-            "accuracy": accuracy,
         }

 import evaluate
 import datasets
+import numpy as np
 # TODO: Add BibTeX citation
 # TODO: Add description of the module here
 _DESCRIPTION = """\
+Computes precision, recall, f1 scores for joint entity-relation extraction task.
 """
     {'accuracy': 1.0}
 """
 @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
 class jer(evaluate.Metric):
     """TODO: Short description of my evaluation module."""
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
             features=datasets.Features({
+                'predictions': datasets.features.Sequence(datasets.Value('string')),
+                'references': datasets.features.Sequence(datasets.Value('string')),
             }),
             # Homepage of the module for documentation
             homepage="http://module.homepage",
     def _compute(self, predictions, references):
         """Returns the scores"""
+        score_dicts = [
+            self._compute_single(prediction=prediction, reference=reference)
+            for prediction, reference in zip(predictions, references)
+        ]
+        return {('mean_' + key): np.mean([scores[key] for scores in score_dicts]) for key in score_dicts[0].keys()}
+    def _compute_single(self, *, prediction: Iterable[str | Tuple | int], reference: Iterable[str | Tuple | int]):
+        reference_set = set(reference)
+        assert len(reference) == len(reference_set), f"Duplicates found in the reference list {reference}"
+        prediction_set = set(prediction)
+        TP = len(reference_set & prediction_set)
+        FP = len(prediction_set - reference_set)
+        FN = len(reference_set - prediction_set)
+        # Calculate metrics
+        precision = TP / (TP + FP) if TP + FP > 0 else 0
+        recall = TP / (TP + FN) if TP + FN > 0 else 0
+        f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
         return {
+            'precision': precision,
+            'recall': recall,
+            'f1': f1_score
         }