bdsaglam commited on
Commit
8c5c6b9
1 Parent(s): 3cc354a

first commit

Browse files
Files changed (1) hide show
  1. jer.py +27 -10
jer.py CHANGED
@@ -15,6 +15,7 @@
15
 
16
  import evaluate
17
  import datasets
 
18
 
19
 
20
  # TODO: Add BibTeX citation
@@ -28,7 +29,7 @@ year={2020}
28
 
29
  # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
32
  """
33
 
34
 
@@ -53,10 +54,6 @@ Examples:
53
  {'accuracy': 1.0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
-
59
-
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
  class jer(evaluate.Metric):
62
  """TODO: Short description of my evaluation module."""
@@ -71,8 +68,8 @@ class jer(evaluate.Metric):
71
  inputs_description=_KWARGS_DESCRIPTION,
72
  # This defines the format of each prediction and reference
73
  features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
  }),
77
  # Homepage of the module for documentation
78
  homepage="http://module.homepage",
@@ -88,8 +85,28 @@ class jer(evaluate.Metric):
88
 
89
  def _compute(self, predictions, references):
90
  """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
 
 
95
  }
 
15
 
16
  import evaluate
17
  import datasets
18
+ import numpy as np
19
 
20
 
21
  # TODO: Add BibTeX citation
 
29
 
30
  # TODO: Add description of the module here
31
  _DESCRIPTION = """\
32
+ Computes precision, recall, f1 scores for joint entity-relation extraction task.
33
  """
34
 
35
 
 
54
  {'accuracy': 1.0}
55
  """
56
 
 
 
 
 
57
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
58
  class jer(evaluate.Metric):
59
  """TODO: Short description of my evaluation module."""
 
68
  inputs_description=_KWARGS_DESCRIPTION,
69
  # This defines the format of each prediction and reference
70
  features=datasets.Features({
71
+ 'predictions': datasets.features.Sequence(datasets.Value('string')),
72
+ 'references': datasets.features.Sequence(datasets.Value('string')),
73
  }),
74
  # Homepage of the module for documentation
75
  homepage="http://module.homepage",
 
85
 
86
  def _compute(self, predictions, references):
87
  """Returns the scores"""
88
+ score_dicts = [
89
+ self._compute_single(prediction=prediction, reference=reference)
90
+ for prediction, reference in zip(predictions, references)
91
+ ]
92
+ return {('mean_' + key): np.mean([scores[key] for scores in score_dicts]) for key in score_dicts[0].keys()}
93
+
94
+ def _compute_single(self, *, prediction: Iterable[str | Tuple | int], reference: Iterable[str | Tuple | int]):
95
+ reference_set = set(reference)
96
+ assert len(reference) == len(reference_set), f"Duplicates found in the reference list {reference}"
97
+ prediction_set = set(prediction)
98
+
99
+ TP = len(reference_set & prediction_set)
100
+ FP = len(prediction_set - reference_set)
101
+ FN = len(reference_set - prediction_set)
102
+
103
+ # Calculate metrics
104
+ precision = TP / (TP + FP) if TP + FP > 0 else 0
105
+ recall = TP / (TP + FN) if TP + FN > 0 else 0
106
+ f1_score = 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
107
+
108
  return {
109
+ 'precision': precision,
110
+ 'recall': recall,
111
+ 'f1': f1_score
112
  }