bigcodebench-evaluator-1

Sleeping

App Files Files Community

terryyz commited on Jul 29

Commit

36fb388

•

1 Parent(s): fb47f55

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -20

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import sys
 import os
 import threading
 import time
 class Logger:
     def __init__(self, filename):
@@ -22,9 +23,6 @@ class Logger:
     def isatty(self):
         return False
-log_file = "bigcodebench_output.log"
-sys.stdout = Logger(log_file)
 default_command = "bigcodebench.evaluate"
 is_running = False
@@ -62,29 +60,46 @@ def generate_command(
     return " ".join(command)
 def run_bigcodebench(command):
     global is_running
     is_running = True
-    print(f"Executing command: {command}")
     process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
     for line in process.stdout:
-        print(line, end='')
     process.wait()
     if process.returncode != 0:
-        print(f"Error: Command exited with status {process.returncode}")
     cleanup_command = "pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; rm -rf /tmp/*"
     subprocess.run(cleanup_command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     is_running = False
-def read_logs():
-    with open(log_file, "r") as f:
-        return f.read()
 with gr.Blocks() as demo:
     gr.Markdown("# BigCodeBench Evaluator")
@@ -92,24 +107,26 @@ with gr.Blocks() as demo:
     with gr.Row():
         jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
         split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
-        subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="full")
     with gr.Row():
         save_pass_rate = gr.Checkbox(label="Save Pass Rate")
         parallel = gr.Number(label="Parallel (optional)", precision=0)
         min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
-        max_as_limit = gr.Number(label="Max AS Limit", value=128*1024, precision=0)
     with gr.Row():
-        max_data_limit = gr.Number(label="Max Data Limit", value=4*1024, precision=0)
         max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
         check_gt_only = gr.Checkbox(label="Check GT Only")
         no_gt = gr.Checkbox(label="No GT")
-    command_output = gr.Textbox(label="Command", lines=2, value=default_command, interactive=False)
     submit_btn = gr.Button("Run Evaluation")
     log_output = gr.Textbox(label="Execution Logs", lines=10)
     def update_command(*args):
         return generate_command(*args)
@@ -125,13 +142,20 @@ with gr.Blocks() as demo:
     def on_submit(command):
         global is_running
         if is_running:
-            return "A command is already running. Please wait for it to finish."
-        threading.Thread(target=run_bigcodebench, args=(command,), daemon=True).start()
-        return "Evaluation started. Please wait for the logs to update..."
-    submit_btn.click(on_submit, inputs=[command_output], outputs=[log_output])
-    demo.load(read_logs, None, log_output, every=1)
 if __name__ == "__main__":
-    demo.queue(max_size=300).launch(server_name="0.0.0.0", server_port=7860)

 import os
 import threading
 import time
+import uuid
 class Logger:
     def __init__(self, filename):
     def isatty(self):
         return False
 default_command = "bigcodebench.evaluate"
 is_running = False
     return " ".join(command)
 def run_bigcodebench(command):
     global is_running
     is_running = True
+    yield f"Executing command: {command}\n"
     process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
     for line in process.stdout:
+        yield line
     process.wait()
     if process.returncode != 0:
+        yield f"Error: Command exited with status {process.returncode}\n"
     cleanup_command = "pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; rm -rf /tmp/*"
     subprocess.run(cleanup_command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
     is_running = False
+    yield "Evaluation completed.\n"
+def stream_logs(command):
+    global is_running
+    if is_running:
+        yield "A command is already running. Please wait for it to finish.\n"
+        return
+    log_content = []
+    for log_line in run_bigcodebench(command):
+        log_content.append(log_line)
+        yield "".join(log_content)
+def read_logs(log_file):
+    if os.path.exists(log_file):
+        with open(log_file, "r") as f:
+            return f.read()
+    return ""
 with gr.Blocks() as demo:
     gr.Markdown("# BigCodeBench Evaluator")
     with gr.Row():
         jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
         split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
+        subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="hard")
     with gr.Row():
         save_pass_rate = gr.Checkbox(label="Save Pass Rate")
         parallel = gr.Number(label="Parallel (optional)", precision=0)
         min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
+        max_as_limit = gr.Number(label="Max AS Limit", value=200*1024, precision=0)
     with gr.Row():
+        max_data_limit = gr.Number(label="Max Data Limit", value=10*1024, precision=0)
         max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
         check_gt_only = gr.Checkbox(label="Check GT Only")
         no_gt = gr.Checkbox(label="No GT")
+    command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
     submit_btn = gr.Button("Run Evaluation")
     log_output = gr.Textbox(label="Execution Logs", lines=10)
+    # Hidden component to store the unique log file path
+    session_log_file = gr.State("")
     def update_command(*args):
         return generate_command(*args)
     def on_submit(command):
         global is_running
         if is_running:
+            yield "A command is already running. Please wait for it to finish."
+            return
+        log_accumulator = []
+        for log_line in run_bigcodebench(command):
+            log_accumulator.append(log_line)
+            yield "\n".join(log_accumulator)
+    submit_btn.click(stream_logs, inputs=[command_output], outputs=[log_output])
+    # def update_logs(session_log_file):
+    #     return read_logs(session_log_file)
+    # demo.load(update_logs, inputs=[session_log_file], outputs=[log_output], every=1)
 if __name__ == "__main__":
+    demo.queue(max_size=300).launch()