File size: 2,627 Bytes
41d1bc5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import ast
import signal
import astunparse
from .executor_utils import function_with_timeout
from typing import List
from .executor_types import ExecuteResult, Executor
class PyExecutor(Executor):
def execute(self, func: str, tests: List[str], timeout: int = 5) -> ExecuteResult:
# Combine function code and assert statement
imports = 'from typing import *'
func_test_list = [f'{imports}\n{func}\n{test}' for test in tests]
# Run the tests and collect the results
success_tests = []
failed_tests = []
is_passing = True
num_tests = len(func_test_list)
for i in range(num_tests):
try:
function_with_timeout(exec, (func_test_list[i], globals()), timeout)
success_tests += [tests[i]]
except Exception:
output = get_output(func, tests[i], timeout=timeout)
failed_tests += [f"{tests[i]} # output: {output}"]
is_passing = False
state = []
for test in tests:
if test in success_tests:
state += [True]
else:
state += [False]
state = tuple(state)
feedback = "Tested passed:"
for test in success_tests:
feedback += f"\n{test}"
feedback += "\n\nTests failed:"
for test in failed_tests:
feedback += f"\n{test}"
return ExecuteResult(is_passing, feedback, state)
def evaluate(self, name: str, func: str, test: str, timeout: int = 5) -> bool:
"""
Evaluates the implementation on Human-Eval Python.
probably should be written in a dataset-agnostic way but not now
"""
code = f"""{func}
{test}
check({name})
"""
try:
function_with_timeout(exec, (code, globals()), timeout)
return True
except Exception:
return False
def get_call_str(assert_statement: str) -> str:
ast_parsed = ast.parse(assert_statement)
try:
call_str = ast_parsed.body[0].test.left # type: ignore
except:
call_str = ast_parsed.body[0].test # type: ignore
return astunparse.unparse(call_str).strip()
def get_output(func: str, assert_statement: str, timeout: int = 5) -> str:
try:
exec(f"from typing import *\n{func}", globals())
func_call = get_call_str(assert_statement)
output = function_with_timeout(eval, (func_call, globals()), timeout)
return output
except TimeoutError:
return "TIMEOUT"
except Exception as e:
return str(e)
|