Spaces:
Running
Running
jjyang77
commited on
Commit
•
da384b4
1
Parent(s):
0f87dc1
update samples input from file to data list
Browse files- .gitignore +2 -0
- Dockerfile +1 -1
- api/app.py +14 -3
- api/bigcodebench_data.py +7 -23
.gitignore
CHANGED
@@ -2,6 +2,8 @@
|
|
2 |
**.pyc
|
3 |
**/__pycache__
|
4 |
|
|
|
|
|
5 |
# Testing data
|
6 |
/data
|
7 |
|
|
|
2 |
**.pyc
|
3 |
**/__pycache__
|
4 |
|
5 |
+
.hypothesis/
|
6 |
+
|
7 |
# Testing data
|
8 |
/data
|
9 |
|
Dockerfile
CHANGED
@@ -21,7 +21,7 @@ RUN pip install --upgrade pip
|
|
21 |
# Pre-install the dataset
|
22 |
#RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
23 |
|
24 |
-
RUN pip install fastapi gunicorn uvicorn[standard] httpx
|
25 |
|
26 |
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
27 |
|
|
|
21 |
# Pre-install the dataset
|
22 |
#RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
|
23 |
|
24 |
+
RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.*
|
25 |
|
26 |
RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
|
27 |
|
api/app.py
CHANGED
@@ -7,6 +7,8 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
7 |
from typing import Dict, List, Tuple
|
8 |
import gc
|
9 |
|
|
|
|
|
10 |
from fastapi import FastAPI
|
11 |
from fastapi.responses import RedirectResponse
|
12 |
|
@@ -15,6 +17,14 @@ from api.code_execution import untrusted_check
|
|
15 |
|
16 |
Result = Tuple[str, List[bool]]
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def create_app() -> FastAPI:
|
19 |
|
20 |
level = os.environ.get("LOG_LEVEL", default=logging.INFO)
|
@@ -33,7 +43,8 @@ def create_app() -> FastAPI:
|
|
33 |
|
34 |
@app.post("/evaluate/")
|
35 |
async def evaluate(
|
36 |
-
samples:
|
|
|
37 |
parallel: int = -1,
|
38 |
min_time_limit: float = 1,
|
39 |
max_as_limit: int = 30 * 1024,
|
@@ -42,7 +53,7 @@ def create_app() -> FastAPI:
|
|
42 |
no_gt: bool = True,
|
43 |
) -> dict:
|
44 |
"""
|
45 |
-
Evaluate the correctness of the solutions in the given samples
|
46 |
"""
|
47 |
if parallel < 1:
|
48 |
n_workers = max(1, multiprocessing.cpu_count() // 2)
|
@@ -71,7 +82,7 @@ def create_app() -> FastAPI:
|
|
71 |
|
72 |
solution = sample["solution"]
|
73 |
|
74 |
-
if
|
75 |
solution = sample["code_prompt"] + "\n pass\n" + solution
|
76 |
remainings.add(sample["_identifier"])
|
77 |
args = (
|
|
|
7 |
from typing import Dict, List, Tuple
|
8 |
import gc
|
9 |
|
10 |
+
from pydantic import BaseModel
|
11 |
+
|
12 |
from fastapi import FastAPI
|
13 |
from fastapi.responses import RedirectResponse
|
14 |
|
|
|
17 |
|
18 |
Result = Tuple[str, List[bool]]
|
19 |
|
20 |
+
class SampleDate(BaseModel):
|
21 |
+
task_id: str
|
22 |
+
solution: str
|
23 |
+
code_prompt: str
|
24 |
+
test: str
|
25 |
+
entry_point: str
|
26 |
+
res_id: int
|
27 |
+
|
28 |
def create_app() -> FastAPI:
|
29 |
|
30 |
level = os.environ.get("LOG_LEVEL", default=logging.INFO)
|
|
|
43 |
|
44 |
@app.post("/evaluate/")
|
45 |
async def evaluate(
|
46 |
+
samples: List[SampleDate],
|
47 |
+
calibrate: bool = True,
|
48 |
parallel: int = -1,
|
49 |
min_time_limit: float = 1,
|
50 |
max_as_limit: int = 30 * 1024,
|
|
|
53 |
no_gt: bool = True,
|
54 |
) -> dict:
|
55 |
"""
|
56 |
+
Evaluate the correctness of the solutions in the given samples data.
|
57 |
"""
|
58 |
if parallel < 1:
|
59 |
n_workers = max(1, multiprocessing.cpu_count() // 2)
|
|
|
82 |
|
83 |
solution = sample["solution"]
|
84 |
|
85 |
+
if calibrate:
|
86 |
solution = sample["code_prompt"] + "\n pass\n" + solution
|
87 |
remainings.add(sample["_identifier"])
|
88 |
args = (
|
api/bigcodebench_data.py
CHANGED
@@ -20,27 +20,11 @@ def stream_jsonl(filename: str) -> Iterable[Dict]:
|
|
20 |
yield json.loads(line)
|
21 |
|
22 |
|
23 |
-
def load_solutions(
|
24 |
-
"""We accept two formats of inputs.
|
25 |
-
+ `sample.jsonl` which is the format from BigCodeBench, i.e., {task_id, completion or solution}.
|
26 |
-
+ A folder which contains sub-folders named after the task_id. Each sub-folder
|
27 |
-
contains samples named in `[?].py` where `?` is the solution id starting with 0.
|
28 |
-
Different from `sample.jsonl`, the solutions must be complete (with prompt prefix).
|
29 |
"""
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
assert "solution" in sample, "No solution found in sample!"
|
37 |
-
assert isinstance(
|
38 |
-
sample["solution"], str
|
39 |
-
), "Solution must be a string! If you have multiple solutions, please repeat the task_id."
|
40 |
-
|
41 |
-
sample["_identifier"] = (
|
42 |
-
sample["task_id"] + f" (line {i+1} in {sample_path})"
|
43 |
-
)
|
44 |
-
yield sample
|
45 |
-
else:
|
46 |
-
raise NotImplementedError("Only jsonl solution output file is supported for now.")
|
|
|
20 |
yield json.loads(line)
|
21 |
|
22 |
|
23 |
+
def load_solutions(samples) -> Iterable[Dict]:
|
|
|
|
|
|
|
|
|
|
|
24 |
"""
|
25 |
+
"""
|
26 |
+
for i, sample in enumerate(samples):
|
27 |
+
sample["_identifier"] = (
|
28 |
+
sample["task_id"] + f" (line {i+1} )"
|
29 |
+
)
|
30 |
+
yield sample
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|