Spaces:
Running
Running
jjyang77
commited on
Commit
•
ba3922a
1
Parent(s):
ccc64a6
refactor sample_data format check
Browse files- Dockerfile +3 -4
- api/app.py +17 -4
- api/bigcodebench_data.py +0 -39
- api/code_execution.py +1 -1
Dockerfile
CHANGED
@@ -7,6 +7,7 @@ RUN apt-get update && apt-get install -y git g++ python3-tk zip unzip procps r-b
|
|
7 |
|
8 |
# upgrade to latest pip
|
9 |
RUN pip install --upgrade pip
|
|
|
10 |
|
11 |
# Acquire benchmark code to local
|
12 |
# ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit
|
@@ -14,13 +15,11 @@ RUN pip install --upgrade pip
|
|
14 |
# RUN cd /bigcodebench
|
15 |
# RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
16 |
|
17 |
-
RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.*
|
18 |
-
|
19 |
-
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
20 |
-
|
21 |
# Add a new user "bigcodebenchuser"
|
22 |
RUN adduser --disabled-password --gecos "" bigcodebenchuser
|
23 |
|
|
|
|
|
24 |
COPY . .
|
25 |
|
26 |
WORKDIR /
|
|
|
7 |
|
8 |
# upgrade to latest pip
|
9 |
RUN pip install --upgrade pip
|
10 |
+
RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.* plotly
|
11 |
|
12 |
# Acquire benchmark code to local
|
13 |
# ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit
|
|
|
15 |
# RUN cd /bigcodebench
|
16 |
# RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
17 |
|
|
|
|
|
|
|
|
|
18 |
# Add a new user "bigcodebenchuser"
|
19 |
RUN adduser --disabled-password --gecos "" bigcodebenchuser
|
20 |
|
21 |
+
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
22 |
+
|
23 |
COPY . .
|
24 |
|
25 |
WORKDIR /
|
api/app.py
CHANGED
@@ -7,10 +7,9 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
7 |
from typing import Dict, List, Tuple
|
8 |
import gc
|
9 |
|
10 |
-
from fastapi import FastAPI
|
11 |
from fastapi.responses import RedirectResponse
|
12 |
|
13 |
-
from api.bigcodebench_data import load_solutions
|
14 |
from api.code_execution import untrusted_check
|
15 |
|
16 |
Result = Tuple[str, List[bool]]
|
@@ -67,7 +66,20 @@ def create_app() -> FastAPI:
|
|
67 |
eval_results = defaultdict(list) # task_id ->
|
68 |
remainings = set()
|
69 |
|
70 |
-
for sample in
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
task_id = sample["task_id"]
|
72 |
|
73 |
solution = sample["solution"]
|
@@ -155,5 +167,6 @@ def check_correctness(
|
|
155 |
)
|
156 |
return ret
|
157 |
|
|
|
158 |
def get_groundtruth():
|
159 |
-
raise
|
|
|
7 |
from typing import Dict, List, Tuple
|
8 |
import gc
|
9 |
|
10 |
+
from fastapi import FastAPI, HTTPException
|
11 |
from fastapi.responses import RedirectResponse
|
12 |
|
|
|
13 |
from api.code_execution import untrusted_check
|
14 |
|
15 |
Result = Tuple[str, List[bool]]
|
|
|
66 |
eval_results = defaultdict(list) # task_id ->
|
67 |
remainings = set()
|
68 |
|
69 |
+
for i, sample in enumerate(samples):
|
70 |
+
# TODO: investigate why HTTPException detail is not passed to client.
|
71 |
+
|
72 |
+
for key in ["task_id", "res_id", "test", "solution", "entry_point"]:
|
73 |
+
if key not in sample:
|
74 |
+
raise HTTPException(status_code=400, detail=f"'{key}' not in sample {i}!")
|
75 |
+
|
76 |
+
if not isinstance(sample["solution"], str):
|
77 |
+
raise HTTPException(status_code=400, detail="Solution must be a string!")
|
78 |
+
|
79 |
+
sample["_identifier"] = (
|
80 |
+
sample["task_id"] + f" (line {i+1} )"
|
81 |
+
)
|
82 |
+
|
83 |
task_id = sample["task_id"]
|
84 |
|
85 |
solution = sample["solution"]
|
|
|
167 |
)
|
168 |
return ret
|
169 |
|
170 |
+
|
171 |
def get_groundtruth():
|
172 |
+
raise HTTPException(status_code=405, detail="Groundtruth execution is not implemented yet!")
|
api/bigcodebench_data.py
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
import os
|
2 |
-
import json
|
3 |
-
import gzip
|
4 |
-
from typing import Dict, Iterable
|
5 |
-
|
6 |
-
def stream_jsonl(filename: str) -> Iterable[Dict]:
|
7 |
-
"""
|
8 |
-
Parses each jsonl line and yields it as a dictionary
|
9 |
-
"""
|
10 |
-
if filename.endswith(".gz"):
|
11 |
-
with open(filename, "rb") as gzfp:
|
12 |
-
with gzip.open(gzfp, "rt") as fp:
|
13 |
-
for line in fp:
|
14 |
-
if any(not x.isspace() for x in line):
|
15 |
-
yield json.loads(line)
|
16 |
-
else:
|
17 |
-
with open(filename, "r") as fp:
|
18 |
-
for line in fp:
|
19 |
-
if any(not x.isspace() for x in line):
|
20 |
-
yield json.loads(line)
|
21 |
-
|
22 |
-
|
23 |
-
def load_solutions(samples) -> Iterable[Dict]:
|
24 |
-
"""
|
25 |
-
"""
|
26 |
-
for i, sample in enumerate(samples):
|
27 |
-
assert "task_id" in sample, "No task_id found in sample!"
|
28 |
-
assert "res_id" in sample, "No res_id found in sample!"
|
29 |
-
assert "test" in sample, "No test found in sample!"
|
30 |
-
assert "solution" in sample, "No solution found in sample!"
|
31 |
-
assert "entry_point" in sample, "No entry_point found in sample!"
|
32 |
-
assert isinstance(
|
33 |
-
sample["solution"], str
|
34 |
-
), "Solution must be a string! If you have multiple solutions, please repeat the task_id."
|
35 |
-
|
36 |
-
sample["_identifier"] = (
|
37 |
-
sample["task_id"] + f" (line {i+1} )"
|
38 |
-
)
|
39 |
-
yield sample
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
api/code_execution.py
CHANGED
@@ -39,7 +39,7 @@ from typing import List, Tuple, Union
|
|
39 |
|
40 |
import numpy as np
|
41 |
|
42 |
-
TIMEOUT_LIMIT=
|
43 |
|
44 |
|
45 |
@contextlib.contextmanager
|
|
|
39 |
|
40 |
import numpy as np
|
41 |
|
42 |
+
TIMEOUT_LIMIT=240.0 # BCB default is 240.0
|
43 |
|
44 |
|
45 |
@contextlib.contextmanager
|