terryyz commited on
Commit
36fb388
1 Parent(s): fb47f55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -20
app.py CHANGED
@@ -4,6 +4,7 @@ import sys
4
  import os
5
  import threading
6
  import time
 
7
 
8
  class Logger:
9
  def __init__(self, filename):
@@ -22,9 +23,6 @@ class Logger:
22
  def isatty(self):
23
  return False
24
 
25
- log_file = "bigcodebench_output.log"
26
- sys.stdout = Logger(log_file)
27
-
28
  default_command = "bigcodebench.evaluate"
29
  is_running = False
30
 
@@ -62,29 +60,46 @@ def generate_command(
62
 
63
  return " ".join(command)
64
 
 
65
  def run_bigcodebench(command):
66
  global is_running
67
  is_running = True
68
- print(f"Executing command: {command}")
69
 
70
  process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
71
 
72
  for line in process.stdout:
73
- print(line, end='')
74
 
75
  process.wait()
76
 
77
  if process.returncode != 0:
78
- print(f"Error: Command exited with status {process.returncode}")
79
 
80
  cleanup_command = "pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; rm -rf /tmp/*"
81
  subprocess.run(cleanup_command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
82
-
83
  is_running = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- def read_logs():
86
- with open(log_file, "r") as f:
87
- return f.read()
88
 
89
  with gr.Blocks() as demo:
90
  gr.Markdown("# BigCodeBench Evaluator")
@@ -92,24 +107,26 @@ with gr.Blocks() as demo:
92
  with gr.Row():
93
  jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
94
  split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
95
- subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="full")
96
 
97
  with gr.Row():
98
  save_pass_rate = gr.Checkbox(label="Save Pass Rate")
99
  parallel = gr.Number(label="Parallel (optional)", precision=0)
100
  min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
101
- max_as_limit = gr.Number(label="Max AS Limit", value=128*1024, precision=0)
102
 
103
  with gr.Row():
104
- max_data_limit = gr.Number(label="Max Data Limit", value=4*1024, precision=0)
105
  max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
106
  check_gt_only = gr.Checkbox(label="Check GT Only")
107
  no_gt = gr.Checkbox(label="No GT")
108
 
109
- command_output = gr.Textbox(label="Command", lines=2, value=default_command, interactive=False)
110
  submit_btn = gr.Button("Run Evaluation")
111
  log_output = gr.Textbox(label="Execution Logs", lines=10)
112
 
 
 
113
  def update_command(*args):
114
  return generate_command(*args)
115
 
@@ -125,13 +142,20 @@ with gr.Blocks() as demo:
125
  def on_submit(command):
126
  global is_running
127
  if is_running:
128
- return "A command is already running. Please wait for it to finish."
129
- threading.Thread(target=run_bigcodebench, args=(command,), daemon=True).start()
130
- return "Evaluation started. Please wait for the logs to update..."
 
 
 
 
 
 
131
 
132
- submit_btn.click(on_submit, inputs=[command_output], outputs=[log_output])
 
133
 
134
- demo.load(read_logs, None, log_output, every=1)
135
 
136
  if __name__ == "__main__":
137
- demo.queue(max_size=300).launch(server_name="0.0.0.0", server_port=7860)
 
4
  import os
5
  import threading
6
  import time
7
+ import uuid
8
 
9
  class Logger:
10
  def __init__(self, filename):
 
23
  def isatty(self):
24
  return False
25
 
 
 
 
26
  default_command = "bigcodebench.evaluate"
27
  is_running = False
28
 
 
60
 
61
  return " ".join(command)
62
 
63
+
64
  def run_bigcodebench(command):
65
  global is_running
66
  is_running = True
67
+ yield f"Executing command: {command}\n"
68
 
69
  process = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True)
70
 
71
  for line in process.stdout:
72
+ yield line
73
 
74
  process.wait()
75
 
76
  if process.returncode != 0:
77
+ yield f"Error: Command exited with status {process.returncode}\n"
78
 
79
  cleanup_command = "pids=$(ps -u $(id -u) -o pid,comm | grep 'bigcodebench' | awk '{print $1}'); if [ -n \"$pids\" ]; then echo $pids | xargs -r kill; fi; rm -rf /tmp/*"
80
  subprocess.run(cleanup_command, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
81
+
82
  is_running = False
83
+ yield "Evaluation completed.\n"
84
+
85
+ def stream_logs(command):
86
+ global is_running
87
+ if is_running:
88
+ yield "A command is already running. Please wait for it to finish.\n"
89
+ return
90
+
91
+ log_content = []
92
+ for log_line in run_bigcodebench(command):
93
+ log_content.append(log_line)
94
+ yield "".join(log_content)
95
+
96
+
97
+ def read_logs(log_file):
98
+ if os.path.exists(log_file):
99
+ with open(log_file, "r") as f:
100
+ return f.read()
101
+ return ""
102
 
 
 
 
103
 
104
  with gr.Blocks() as demo:
105
  gr.Markdown("# BigCodeBench Evaluator")
 
107
  with gr.Row():
108
  jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
109
  split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
110
+ subset = gr.Dropdown(choices=["full", "hard"], label="Subset", value="hard")
111
 
112
  with gr.Row():
113
  save_pass_rate = gr.Checkbox(label="Save Pass Rate")
114
  parallel = gr.Number(label="Parallel (optional)", precision=0)
115
  min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
116
+ max_as_limit = gr.Number(label="Max AS Limit", value=200*1024, precision=0)
117
 
118
  with gr.Row():
119
+ max_data_limit = gr.Number(label="Max Data Limit", value=10*1024, precision=0)
120
  max_stack_limit = gr.Number(label="Max Stack Limit", value=5, precision=0)
121
  check_gt_only = gr.Checkbox(label="Check GT Only")
122
  no_gt = gr.Checkbox(label="No GT")
123
 
124
+ command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
125
  submit_btn = gr.Button("Run Evaluation")
126
  log_output = gr.Textbox(label="Execution Logs", lines=10)
127
 
128
+ # Hidden component to store the unique log file path
129
+ session_log_file = gr.State("")
130
  def update_command(*args):
131
  return generate_command(*args)
132
 
 
142
  def on_submit(command):
143
  global is_running
144
  if is_running:
145
+ yield "A command is already running. Please wait for it to finish."
146
+ return
147
+
148
+ log_accumulator = []
149
+ for log_line in run_bigcodebench(command):
150
+ log_accumulator.append(log_line)
151
+ yield "\n".join(log_accumulator)
152
+
153
+ submit_btn.click(stream_logs, inputs=[command_output], outputs=[log_output])
154
 
155
+ # def update_logs(session_log_file):
156
+ # return read_logs(session_log_file)
157
 
158
+ # demo.load(update_logs, inputs=[session_log_file], outputs=[log_output], every=1)
159
 
160
  if __name__ == "__main__":
161
+ demo.queue(max_size=300).launch()