File size: 2,454 Bytes
546a9ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import unittest
from typing import Tuple, List, Dict

from evaluation import SUPPORTED_EVALUATION_METRICS

from helpers import print_with_color


class TestEvaluationMetrics(unittest.TestCase):
    def get_summary_pairs(self, size: int = 1) -> Tuple[List[str]]:
        test_output = (
            [
                """
        Glowing letters that had been hanging above
        the Yankee stadium from 1976 to 2008 were placed for auction at
        Sotheby’s on Wednesday, but were not sold, The current owner
        of the sign is Reggie Jackson, a Yankee hall-of-famer."""
            ]
            * size
        )
        test_target = (
            [
                """
        An auction for the lights from Yankee Stadium failed to
        produce any bids on Wednesday at Sotheby’s. The lights,
        currently owned by former Yankees player Reggie Jackson,
        lit the stadium from 1976 until 2008."""
            ]
            * size
        )

        return test_output, test_target

    def test_evaluate(self):
        print_with_color(f"{'#'*10} Testing all evaluation metrics... {'#'*10}\n", "35")

        num_eval_metrics = 0

        for metric_class in SUPPORTED_EVALUATION_METRICS:
            # if metric_class in [Rouge, RougeWe]:
            #     # TODO: Temporarily skipping Rouge/RougeWE metrics to avoid local bug.
            #     continue

            print_with_color(f"Testing {metric_class.metric_name}...", "35")

            metric = metric_class()

            test_output, test_target = self.get_summary_pairs()
            score_dict = metric.evaluate(test_output, test_target)
            print(f"{metric_class} output dictionary")
            print(score_dict)
            self.assertTrue(isinstance(score_dict, Dict))
            self.assertNotEqual(score_dict, {})

            for k, v in score_dict.items():
                self.assertTrue(isinstance(k, str) and isinstance(v, float))
                # # TODO: add metric score range assertions
                # self.assertTrue(self.range[0] <= score_dict[k])
                # self.assertTrue(score_dict[k] <= self.range[1])

            print_with_color(f"{metric_class.metric_name} test complete\n", "32")
            num_eval_metrics += 1

        print_with_color(
            f"{'#'*10} Evaluation metrics test complete ({num_eval_metrics} metrics) {'#'*10}",
            "32",
        )


if __name__ == "__main__":
    unittest.main()