Spaces:

jjyang7
/

bcb_evaluator_testing

Running

jjyang77 commited on 11 days ago

Commit

ba3922a

•

1 Parent(s): ccc64a6

refactor sample_data format check

Files changed (4) hide show

Dockerfile CHANGED Viewed

@@ -7,6 +7,7 @@ RUN apt-get update && apt-get install -y git g++ python3-tk zip unzip procps r-b
 # upgrade to latest pip
 RUN pip install --upgrade pip
 # Acquire benchmark code to local
 # ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit
@@ -14,13 +15,11 @@ RUN pip install --upgrade pip
 # RUN cd /bigcodebench
 # RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
-RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.*
-RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
 # Add a new user "bigcodebenchuser"
 RUN adduser --disabled-password --gecos "" bigcodebenchuser
 COPY . .
 WORKDIR /

 # upgrade to latest pip
 RUN pip install --upgrade pip
+RUN pip install fastapi gunicorn uvicorn[standard] httpx pydantic==2.* plotly
 # Acquire benchmark code to local
 # ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit
 # RUN cd /bigcodebench
 # RUN python3 -c "from bigcodebench.data import get_bigcodebench; get_bigcodebench()"
 # Add a new user "bigcodebenchuser"
 RUN adduser --disabled-password --gecos "" bigcodebenchuser
+RUN pip install -I --timeout 2000 -r https://github.com/bigcode-project/bigcodebench-annotation/releases/download/v0.1.0/requirements.txt
 COPY . .
 WORKDIR /

api/app.py CHANGED Viewed

@@ -7,10 +7,9 @@ from concurrent.futures import ProcessPoolExecutor, as_completed
 from typing import Dict, List, Tuple
 import gc
-from fastapi import FastAPI
 from fastapi.responses import RedirectResponse
-from api.bigcodebench_data import load_solutions
 from api.code_execution import untrusted_check
 Result = Tuple[str, List[bool]]
@@ -67,7 +66,20 @@ def create_app() -> FastAPI:
             eval_results = defaultdict(list)  # task_id ->
             remainings = set()
-            for sample in load_solutions(samples):
                 task_id = sample["task_id"]
                 solution = sample["solution"]
@@ -155,5 +167,6 @@ def check_correctness(
     )
     return ret
 def get_groundtruth():
-    raise NotImplementedError("Groundtruth execution is not implemented yet.")

 from typing import Dict, List, Tuple
 import gc
+from fastapi import FastAPI, HTTPException
 from fastapi.responses import RedirectResponse
 from api.code_execution import untrusted_check
 Result = Tuple[str, List[bool]]
             eval_results = defaultdict(list)  # task_id ->
             remainings = set()
+            for i, sample in enumerate(samples):
+                # TODO: investigate why HTTPException detail is not passed to client.
+                for key in ["task_id", "res_id", "test", "solution", "entry_point"]:
+                    if key not in sample:
+                        raise HTTPException(status_code=400, detail=f"'{key}' not in sample {i}!")
+                if not isinstance(sample["solution"], str):
+                    raise HTTPException(status_code=400, detail="Solution must be a string!")
+                sample["_identifier"] = (
+                    sample["task_id"] + f" (line {i+1} )"
+                )
                 task_id = sample["task_id"]
                 solution = sample["solution"]
     )
     return ret
 def get_groundtruth():
+    raise HTTPException(status_code=405, detail="Groundtruth execution is not implemented yet!")

api/bigcodebench_data.py DELETED Viewed

@@ -1,39 +0,0 @@
-import os
-import json
-import gzip
-from typing import Dict, Iterable
-def stream_jsonl(filename: str) -> Iterable[Dict]:
-    """
-    Parses each jsonl line and yields it as a dictionary
-    """
-    if filename.endswith(".gz"):
-        with open(filename, "rb") as gzfp:
-            with gzip.open(gzfp, "rt") as fp:
-                for line in fp:
-                    if any(not x.isspace() for x in line):
-                        yield json.loads(line)
-    else:
-        with open(filename, "r") as fp:
-            for line in fp:
-                if any(not x.isspace() for x in line):
-                    yield json.loads(line)
-def load_solutions(samples) -> Iterable[Dict]:
-    """
-    """
-    for i, sample in enumerate(samples):
-        assert "task_id" in sample, "No task_id found in sample!"
-        assert "res_id" in sample, "No res_id found in sample!"
-        assert "test" in sample, "No test found in sample!"
-        assert "solution" in sample, "No solution found in sample!"
-        assert "entry_point" in sample, "No entry_point found in sample!"
-        assert isinstance(
-            sample["solution"], str
-        ), "Solution must be a string! If you have multiple solutions, please repeat the task_id."
-        sample["_identifier"] = (
-            sample["task_id"] + f" (line {i+1} )"
-        )
-        yield sample

api/code_execution.py CHANGED Viewed

@@ -39,7 +39,7 @@ from typing import List, Tuple, Union
 import numpy as np
-TIMEOUT_LIMIT=30.0  # BCB default is 240.0
 @contextlib.contextmanager

 import numpy as np
+TIMEOUT_LIMIT=240.0  # BCB default is 240.0
 @contextlib.contextmanager