Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ from warnings import warn
|
|
13 |
import gc
|
14 |
|
15 |
import numpy as np
|
16 |
-
|
17 |
from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
|
18 |
from bigcodebench.data.utils import CACHE_DIR
|
19 |
from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check
|
@@ -22,7 +22,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
22 |
|
23 |
REPO_ID = "bigcode/bigcodebench-evaluator"
|
24 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
25 |
-
|
26 |
Result = Tuple[str, List[bool]]
|
27 |
|
28 |
|
@@ -230,30 +230,30 @@ def evaluate(
|
|
230 |
return results, pass_at_k
|
231 |
|
232 |
|
233 |
-
def run_gradio():
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
|
258 |
|
259 |
def preload_gt():
|
@@ -272,12 +272,11 @@ def restart_space():
|
|
272 |
logging.error(f"Failed to restart space: {e}")
|
273 |
|
274 |
|
275 |
-
if __name__ == "__main__":
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
# evaluate("complete", "hard", "meta-llama--Llama-3.2-3B-Instruct--bigcodebench-instruct--vllm-0-1.jsonl")
|
|
|
13 |
import gc
|
14 |
|
15 |
import numpy as np
|
16 |
+
from huggingface_hub import HfApi
|
17 |
from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
|
18 |
from bigcodebench.data.utils import CACHE_DIR
|
19 |
from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check
|
|
|
22 |
|
23 |
REPO_ID = "bigcode/bigcodebench-evaluator"
|
24 |
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
25 |
+
API = HfApi(token=HF_TOKEN)
|
26 |
Result = Tuple[str, List[bool]]
|
27 |
|
28 |
|
|
|
230 |
return results, pass_at_k
|
231 |
|
232 |
|
233 |
+
# def run_gradio():
|
234 |
+
interface = gr.Interface(
|
235 |
+
fn=evaluate,
|
236 |
+
inputs=[
|
237 |
+
gr.Dropdown(["complete", "instruct"], label="BigCodeBench Split"),
|
238 |
+
gr.Dropdown(["full", "hard"], label="BigCodeBench Subset"),
|
239 |
+
gr.File(label="Samples Path (.jsonl)"),
|
240 |
+
gr.Textbox(label="Pass k Values (comma-separated)", value="1,5,10"),
|
241 |
+
gr.Slider(-1, multiprocessing.cpu_count(), step=1, label="Parallel Workers", value=-1),
|
242 |
+
gr.Slider(0.1, 10, step=0.1, label="Min Time Limit", value=1),
|
243 |
+
gr.Slider(1, 100 * 1024, step=1024, label="Max AS Limit", value=30 * 1024),
|
244 |
+
gr.Slider(1, 100 * 1024, step=1024, label="Max Data Limit", value=30 * 1024),
|
245 |
+
gr.Slider(1, 100, step=1, label="Max Stack Limit", value=10),
|
246 |
+
gr.Checkbox(label="Check GT Only"),
|
247 |
+
gr.Checkbox(label="No GT"),
|
248 |
+
],
|
249 |
+
outputs=[
|
250 |
+
gr.JSON(label="Results"),
|
251 |
+
gr.JSON(label="Eval Results"),
|
252 |
+
],
|
253 |
+
# concurrency_limit=None
|
254 |
+
)
|
255 |
+
interface.queue(default_concurrency_limit=None)
|
256 |
+
# interface.launch(show_error=True)
|
257 |
|
258 |
|
259 |
def preload_gt():
|
|
|
272 |
logging.error(f"Failed to restart space: {e}")
|
273 |
|
274 |
|
275 |
+
# if __name__ == "__main__":
|
276 |
+
preload_gt()
|
277 |
+
# run_gradio()
|
278 |
+
|
279 |
+
scheduler = BackgroundScheduler()
|
280 |
+
scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
|
281 |
+
logging.info("Scheduler initialized to restart space every 1 hour.")
|
282 |
+
scheduler.start()
|
|