File size: 2,677 Bytes
256a159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import unittest

from opencompass.datasets.humaneval import humaneval_postprocess


def run_humaneval_check(completion):
    program = [
        'def get_fraction(x: float) -> float:',
        humaneval_postprocess(completion),
        '',
        'assert get_fraction(1.28) == 0.28',
        'assert get_fraction(1.0) == 0.0',
    ]
    program = '\n'.join(program)
    exec(program)


class TestHumaneval(unittest.TestCase):

    def test_vanilla(self):
        raw = '    return x - int(x)'
        run_humaneval_check(raw)

    def test_python_quote(self):
        lines = [
            '```python',
            '    return x - int(x)',
            '```',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)

    def test_bare_quote(self):
        lines = [
            '```',
            '    return x - int(x)',
            '```',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)

    def test_error_space_quote(self):
        lines = [
            '```',
            '  return x - int(x)',
            '```',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)

    def test_import_1(self):
        lines = [
            'import numpy as np',
            'import math',
            'from typing import List',
            '',
            'def func(x):',
            '    return x - int(x)',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)

    def test_import_2(self):
        lines = [
            'from typing import List',
            'import numpy as np',
            'import math',
            'def func(x):',
            '    return x - int(x)',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)

    def test_import_3(self):
        lines = [
            'import math',
            '',
            '',
            'def func(x):',
            '    return x - int(x)',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)

    def test_comment(self):
        lines = [
            'def func(x: float) -> float:',
            "    '''",
            '    blah blah blah',
            '    blah blah blah',
            "    '''",
            '    return x - int(x)',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)

    def test_additional(self):
        lines = [
            '    return x - int(x)',
            '',
            '',
            'def func(x: float) -> float:',
            "    '''",
            '    blah blah blah',
            '    blah blah blah',
            "    '''",
            '    return x - int(x)',
        ]
        raw = '\n'.join(lines)
        run_humaneval_check(raw)