Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
import json | |
import asyncio | |
import argparse | |
import httpx | |
from typing import List, Optional | |
_parser = argparse.ArgumentParser() | |
_parser.add_argument("--filename", type=str, help="filename like data/codgen-...jsonl") | |
_parser.add_argument("--remoteapi", type=str, help="remote execution API if not running local eval") | |
def load_jsonl(filename): | |
with open(filename, "r") as file: | |
return [json.loads(line.strip()) for line in file] | |
def save_jsonl(filename, data): | |
with open(filename, "w") as file: | |
for d in data: | |
file.write(json.dumps(d)) | |
file.write("\n") | |
return filename | |
async def call_oe_eval_bcb_client( | |
samples_data: List[dict], | |
calibrate: bool = True, | |
parallel: int = -1, | |
min_time_limit: float = 1, | |
max_as_limit: int = 30 * 1024, | |
max_data_limit: int = 30 * 1024, | |
max_stack_limit: int = 10, | |
no_gt: bool = True, | |
execute_api: Optional[str] = None, | |
) -> List[dict]: | |
""" | |
OE-Eval BigCodeBench remote code execution API | |
""" | |
if execute_api is None: | |
execute_api = "http://localhost:9000/evaluate/" | |
async with httpx.AsyncClient() as client: | |
params = { | |
"calibrate": calibrate, | |
"parallel": parallel, | |
"min_time_limit": min_time_limit, | |
"max_as_limit": max_as_limit, | |
"max_data_limit": max_data_limit, | |
"max_stack_limit": max_stack_limit, | |
"no_gt": no_gt, | |
} | |
# Even for the Full BCB dataset, total execution time should not exceed 5-10 min unless many instances of | |
# generated codes are particularly mal-formed or slow. (per instance exec timeout is 30 sec) | |
total_timeout = 900 | |
response = await client.post( | |
execute_api, json=samples_data, params=params, timeout=total_timeout | |
) | |
results = response.json() | |
print("Results received from remote API. Processing ...") | |
check_results = [] | |
for doc in results["eval"].values(): | |
for rep in doc: | |
rep["tested_completion"] = rep.pop("solution") | |
rep["passed"] = rep.pop("status") == "pass" | |
rep["exec_result"] = rep.pop("details") | |
check_results.append(rep) | |
if check_results: | |
pass_at_1 = sum([rep["passed"] for rep in check_results])/len(check_results) | |
return check_results, pass_at_1 | |
else: | |
return None, None | |
def evaluate(sample_file, execute_api: Optional[str] = None): | |
batched_code_test = load_jsonl(sample_file) | |
results, pass_at_1 = asyncio.run( | |
call_oe_eval_bcb_client( | |
samples_data=batched_code_test, | |
calibrate=True, | |
parallel=-1, | |
min_time_limit=30, | |
execute_api = execute_api | |
) | |
) | |
print("pass@1:", pass_at_1) | |
return results | |
def main(): | |
args = _parser.parse_args() | |
args_dict = vars(args) | |
results = evaluate(args_dict["filename"], args_dict["remoteapi"]) | |
save_jsonl("data/eval_results.jsonl", results) | |
if __name__ == "__main__": | |
main() |