terryyz commited on
Commit
4211404
1 Parent(s): f9ea51a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -35
app.py CHANGED
@@ -13,7 +13,7 @@ from warnings import warn
13
  import gc
14
 
15
  import numpy as np
16
-
17
  from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
18
  from bigcodebench.data.utils import CACHE_DIR
19
  from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check
@@ -22,7 +22,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
22
 
23
  REPO_ID = "bigcode/bigcodebench-evaluator"
24
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
25
-
26
  Result = Tuple[str, List[bool]]
27
 
28
 
@@ -230,30 +230,30 @@ def evaluate(
230
  return results, pass_at_k
231
 
232
 
233
- def run_gradio():
234
- interface = gr.Interface(
235
- fn=evaluate,
236
- inputs=[
237
- gr.Dropdown(["complete", "instruct"], label="BigCodeBench Split"),
238
- gr.Dropdown(["full", "hard"], label="BigCodeBench Subset"),
239
- gr.File(label="Samples Path (.jsonl)"),
240
- gr.Textbox(label="Pass k Values (comma-separated)", value="1,5,10"),
241
- gr.Slider(-1, multiprocessing.cpu_count(), step=1, label="Parallel Workers", value=-1),
242
- gr.Slider(0.1, 10, step=0.1, label="Min Time Limit", value=1),
243
- gr.Slider(1, 100 * 1024, step=1024, label="Max AS Limit", value=30 * 1024),
244
- gr.Slider(1, 100 * 1024, step=1024, label="Max Data Limit", value=30 * 1024),
245
- gr.Slider(1, 100, step=1, label="Max Stack Limit", value=10),
246
- gr.Checkbox(label="Check GT Only"),
247
- gr.Checkbox(label="No GT"),
248
- ],
249
- outputs=[
250
- gr.JSON(label="Results"),
251
- gr.JSON(label="Eval Results"),
252
- ],
253
- # concurrency_limit=None
254
- )
255
- interface.queue(default_concurrency_limit=None)
256
- interface.launch(show_error=True)
257
 
258
 
259
  def preload_gt():
@@ -272,12 +272,11 @@ def restart_space():
272
  logging.error(f"Failed to restart space: {e}")
273
 
274
 
275
- if __name__ == "__main__":
276
- preload_gt()
277
- run_gradio()
278
-
279
- scheduler = BackgroundScheduler()
280
- scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
281
- logging.info("Scheduler initialized to restart space every 1 hour.")
282
- scheduler.start()
283
- # evaluate("complete", "hard", "meta-llama--Llama-3.2-3B-Instruct--bigcodebench-instruct--vllm-0-1.jsonl")
 
13
  import gc
14
 
15
  import numpy as np
16
+ from huggingface_hub import HfApi
17
  from bigcodebench.data import get_bigcodebench, get_bigcodebench_hash, load_solutions
18
  from bigcodebench.data.utils import CACHE_DIR
19
  from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check
 
22
 
23
  REPO_ID = "bigcode/bigcodebench-evaluator"
24
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
25
+ API = HfApi(token=HF_TOKEN)
26
  Result = Tuple[str, List[bool]]
27
 
28
 
 
230
  return results, pass_at_k
231
 
232
 
233
+ # def run_gradio():
234
+ interface = gr.Interface(
235
+ fn=evaluate,
236
+ inputs=[
237
+ gr.Dropdown(["complete", "instruct"], label="BigCodeBench Split"),
238
+ gr.Dropdown(["full", "hard"], label="BigCodeBench Subset"),
239
+ gr.File(label="Samples Path (.jsonl)"),
240
+ gr.Textbox(label="Pass k Values (comma-separated)", value="1,5,10"),
241
+ gr.Slider(-1, multiprocessing.cpu_count(), step=1, label="Parallel Workers", value=-1),
242
+ gr.Slider(0.1, 10, step=0.1, label="Min Time Limit", value=1),
243
+ gr.Slider(1, 100 * 1024, step=1024, label="Max AS Limit", value=30 * 1024),
244
+ gr.Slider(1, 100 * 1024, step=1024, label="Max Data Limit", value=30 * 1024),
245
+ gr.Slider(1, 100, step=1, label="Max Stack Limit", value=10),
246
+ gr.Checkbox(label="Check GT Only"),
247
+ gr.Checkbox(label="No GT"),
248
+ ],
249
+ outputs=[
250
+ gr.JSON(label="Results"),
251
+ gr.JSON(label="Eval Results"),
252
+ ],
253
+ # concurrency_limit=None
254
+ )
255
+ interface.queue(default_concurrency_limit=None)
256
+ # interface.launch(show_error=True)
257
 
258
 
259
  def preload_gt():
 
272
  logging.error(f"Failed to restart space: {e}")
273
 
274
 
275
+ # if __name__ == "__main__":
276
+ preload_gt()
277
+ # run_gradio()
278
+
279
+ scheduler = BackgroundScheduler()
280
+ scheduler.add_job(restart_space, "interval", hours=1) # Restart every 1h
281
+ logging.info("Scheduler initialized to restart space every 1 hour.")
282
+ scheduler.start()