|
import ast |
|
import signal |
|
import astunparse |
|
|
|
from .executor_utils import function_with_timeout |
|
|
|
from typing import List |
|
from .executor_types import ExecuteResult, Executor |
|
|
|
class PyExecutor(Executor): |
|
def execute(self, func: str, tests: List[str], timeout: int = 5) -> ExecuteResult: |
|
|
|
imports = 'from typing import *' |
|
func_test_list = [f'{imports}\n{func}\n{test}' for test in tests] |
|
|
|
|
|
success_tests = [] |
|
failed_tests = [] |
|
is_passing = True |
|
num_tests = len(func_test_list) |
|
for i in range(num_tests): |
|
try: |
|
|
|
function_with_timeout(exec, (func_test_list[i], globals()), timeout) |
|
|
|
success_tests += [tests[i]] |
|
except Exception: |
|
output = get_output(func, tests[i], timeout=timeout) |
|
failed_tests += [f"{tests[i]} # output: {output}"] |
|
is_passing = False |
|
|
|
state = [] |
|
for test in tests: |
|
if test in success_tests: |
|
state += [True] |
|
else: |
|
state += [False] |
|
|
|
state = tuple(state) |
|
|
|
feedback = "Tested passed:" |
|
for test in success_tests: |
|
feedback += f"\n{test}" |
|
feedback += "\n\nTests failed:" |
|
for test in failed_tests: |
|
feedback += f"\n{test}" |
|
|
|
return ExecuteResult(is_passing, feedback, state) |
|
|
|
def evaluate(self, name: str, func: str, test: str, timeout: int = 5) -> bool: |
|
""" |
|
Evaluates the implementation on Human-Eval Python. |
|
|
|
probably should be written in a dataset-agnostic way but not now |
|
""" |
|
code = f"""{func} |
|
|
|
{test} |
|
|
|
check({name}) |
|
""" |
|
try: |
|
|
|
function_with_timeout(exec, (code, globals()), timeout) |
|
|
|
return True |
|
except Exception: |
|
return False |
|
|
|
def get_call_str(assert_statement: str) -> str: |
|
ast_parsed = ast.parse(assert_statement) |
|
try: |
|
call_str = ast_parsed.body[0].test.left |
|
except: |
|
call_str = ast_parsed.body[0].test |
|
|
|
return astunparse.unparse(call_str).strip() |
|
|
|
def get_output(func: str, assert_statement: str, timeout: int = 5) -> str: |
|
try: |
|
exec(f"from typing import *\n{func}", globals()) |
|
func_call = get_call_str(assert_statement) |
|
output = function_with_timeout(eval, (func_call, globals()), timeout) |
|
return output |
|
except TimeoutError: |
|
return "TIMEOUT" |
|
except Exception as e: |
|
return str(e) |
|
|