Spaces:
Running
Running
MorishT
commited on
Commit
•
adb6e72
1
Parent(s):
3587f61
[first commit]
Browse files- FLD_metrics.py +58 -0
- README.honoka.md +5 -0
- README.md +1 -1
- requirements.txt +1 -0
FLD_metrics.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Dict, Any
|
2 |
+
from collections import defaultdict
|
3 |
+
import statistics
|
4 |
+
|
5 |
+
import datasets
|
6 |
+
import evaluate
|
7 |
+
from FLD_task import build_metrics
|
8 |
+
|
9 |
+
|
10 |
+
_DESCRIPTION = ""
|
11 |
+
_KWARGS_DESCRIPTION = ""
|
12 |
+
_CITATION = ""
|
13 |
+
|
14 |
+
|
15 |
+
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
16 |
+
class FLDMetrics(evaluate.Metric):
|
17 |
+
|
18 |
+
def __init__(self, *args, log_samples=False, **kwargs):
|
19 |
+
super().__init__(*args, **kwargs)
|
20 |
+
self._metric_funcs = {
|
21 |
+
'strct': build_metrics('strict'),
|
22 |
+
'extr_stps': build_metrics('allow_extra_steps'),
|
23 |
+
}
|
24 |
+
self.log_samples = log_samples
|
25 |
+
|
26 |
+
def _info(self):
|
27 |
+
return evaluate.MetricInfo(
|
28 |
+
description=_DESCRIPTION,
|
29 |
+
citation=_CITATION,
|
30 |
+
inputs_description=_KWARGS_DESCRIPTION,
|
31 |
+
features=datasets.Features(
|
32 |
+
{
|
33 |
+
"predictions": datasets.Value("string"),
|
34 |
+
"references": datasets.Sequence(datasets.Value("string")),
|
35 |
+
"contexts": datasets.Value("string"),
|
36 |
+
}
|
37 |
+
),
|
38 |
+
# reference_urls=["https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html"],
|
39 |
+
)
|
40 |
+
|
41 |
+
def _compute(self, predictions, references, contexts):
|
42 |
+
if contexts is None:
|
43 |
+
contexts = [None] * len(predictions)
|
44 |
+
|
45 |
+
metrics: Dict[str, List[Any]] = defaultdict(list)
|
46 |
+
for pred, golds, context in zip(predictions, references, contexts):
|
47 |
+
for metric_type, calc_metrics in self._metric_funcs.items():
|
48 |
+
_metrics = calc_metrics(
|
49 |
+
golds,
|
50 |
+
pred,
|
51 |
+
context=context,
|
52 |
+
)
|
53 |
+
for metric_name, metric_val in _metrics.items():
|
54 |
+
metrics[f"{metric_type}.{metric_name}"].append(metric_val)
|
55 |
+
results = {}
|
56 |
+
for metric_name, metric_vals in metrics.items():
|
57 |
+
results[f"{metric_name}"] = statistics.mean(metric_vals)
|
58 |
+
return results
|
README.honoka.md
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Develpments
|
2 |
+
See the following:
|
3 |
+
* [Creating and sharing a new evaluation](https://huggingface.co/docs/evaluate/creating_and_sharing)
|
4 |
+
* [evaluate-metric/accuracy at main](https://huggingface.co/spaces/evaluate-metric/accuracy/tree/main)
|
5 |
+
* [evaluate-metric/mase at main](https://huggingface.co/spaces/evaluate-metric/mase/blob/main/mase.py)
|
README.md
CHANGED
@@ -7,4 +7,4 @@ sdk: static
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
-
|
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
+
The metrics used in the [FLD project](https://github.com/hitachi-nlp/FLD).
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
FLD_task@git+https://github.com/hitachi-nlp/FLD-task.git@logitorch
|