import gradio as gr import io import sys import logging import multiprocessing import os import pickle import threading import time from collections import Counter, defaultdict from concurrent.futures import ProcessPoolExecutor, as_completed, wait, FIRST_COMPLETED from datetime import datetime from typing import Any, Dict, List, Tuple from warnings import warn from contextlib import redirect_stdout, redirect_stderr import numpy as np from huggingface_hub import HfApi from bigcodebench.data.utils import CACHE_DIR from bigcodebench.eval import PASS, compatible_eval_result, estimate_pass_at_k, untrusted_check from bigcodebench.gen.util import trusted_check from apscheduler.schedulers.background import BackgroundScheduler from datasets import load_datase REPO_ID = "bigcode/bigcodebench-interaction" HF_TOKEN = os.environ.get("HF_TOKEN", None) API = HfApi(token=HF_TOKEN) Result = Tuple[str, List[bool]] dataset = load_dataset("bigcode/bigcodebench-tool") tasks = { _id: task["mixed_tool_implementation"] for _id, task in dataset.items() } def run_code(code: str, _id: str) -> str: # Create string buffers to capture output stdout_buffer = io.StringIO() stderr_buffer = io.StringIO() pre_code = tasks[_id] # Capture both stdout and stderr with redirect_stdout(stdout_buffer), redirect_stderr(stderr_buffer): try: # Execute the code exec(pre_code + code) # Get the output output = stdout_buffer.getvalue() errors = stderr_buffer.getvalue() # Combine stdout and stderr result = output if errors: result += "\n--- Errors ---\n" + errors except Exception as e: # Capture any execution errors result = f"Error: {str(e)}" return result # Create the Gradio interface with better styling interface = gr.Interface( fn=run_code, inputs=[ gr.Code(label="Python Code", language="python"), gr.Dropdown(label="Task", choices=list(tasks.keys())), ], outputs=[ gr.Textbox(label="Output") ], ) interface.queue(default_concurrency_limit=None) def restart_space(): logging.info(f"Restarting space with repo ID: {REPO_ID}") try: # Now restart the space API.restart_space(repo_id=REPO_ID, token=HF_TOKEN) logging.info("Space restarted successfully.") except Exception as e: logging.error(f"Failed to restart space: {e}") scheduler = BackgroundScheduler() scheduler.add_job(restart_space, "interval", hours=5) # Restart every 5hs scheduler.start() interface.launch(show_error=True)