Terry Zhuo commited on
Commit
5729010
β€’
1 Parent(s): b64230f
Files changed (1) hide show
  1. app.py +52 -52
app.py CHANGED
@@ -512,66 +512,66 @@ with main_block as demo:
512
  with gr.TabItem("πŸš€ Request", id=4):
513
  gr.Markdown(SUBMISSION_TEXT_3)
514
 
515
- # with gr.TabItem("πŸ› οΈ Execute", id=5):
516
- # gr.Markdown("# BigCodeBench Evaluator")
517
 
518
- # with gr.Row():
519
- # jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
520
- # split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
521
- # subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
522
 
523
- # with gr.Row():
524
- # parallel = gr.Number(label="Parallel (optional)", precision=0)
525
- # min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
526
- # max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
527
 
528
- # with gr.Row():
529
- # max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
530
- # max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
531
- # check_gt_only = gr.Checkbox(label="Check GT Only")
532
- # no_gt = gr.Checkbox(label="No GT")
533
 
534
- # command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
535
- # with gr.Row():
536
- # submit_btn = gr.Button("Run Evaluation")
537
- # download_btn = gr.DownloadButton(label="Download Result")
538
- # log_output = gr.Textbox(label="Execution Logs", lines=20)
539
 
540
- # input_components = [
541
- # jsonl_file, split, subset, parallel,
542
- # min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
543
- # check_gt_only, no_gt
544
- # ]
545
 
546
- # for component in input_components:
547
- # component.change(generate_command, inputs=input_components, outputs=command_output)
548
 
549
 
550
- # def start_evaluation(command, jsonl_file, subset, split):
551
- # extra = subset + "_" if subset != "full" else ""
552
- # if jsonl_file is not None:
553
- # result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
554
- # else:
555
- # result_path = None
556
-
557
- # for log in stream_logs(command, jsonl_file):
558
- # if jsonl_file is not None:
559
- # yield log, gr.update(value=result_path, label=result_path), gr.update()
560
- # else:
561
- # yield log, gr.update(), gr.update()
562
- # is_running = False
563
- # result_file = find_result_file()
564
- # if result_file:
565
- # return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
566
- # # gr.Button(visible=False)#,
567
- # # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
568
- # else:
569
- # return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
570
- # # gr.Button("Run Evaluation", visible=True),
571
- # # gr.DownloadButton(visible=False))
572
- # submit_btn.click(start_evaluation,
573
- # inputs=[command_output, jsonl_file, subset, split],
574
- # outputs=[log_output, download_btn])
575
 
576
  with gr.Row():
577
  with gr.Accordion("πŸ“™ Citation", open=False):
 
512
  with gr.TabItem("πŸš€ Request", id=4):
513
  gr.Markdown(SUBMISSION_TEXT_3)
514
 
515
+ with gr.TabItem("πŸ› οΈ Execute", id=5):
516
+ gr.Markdown("# BigCodeBench Evaluator")
517
 
518
+ with gr.Row():
519
+ jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
520
+ split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
521
+ subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
522
 
523
+ with gr.Row():
524
+ parallel = gr.Number(label="Parallel (optional)", precision=0)
525
+ min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
526
+ max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
527
 
528
+ with gr.Row():
529
+ max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
530
+ max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
531
+ check_gt_only = gr.Checkbox(label="Check GT Only")
532
+ no_gt = gr.Checkbox(label="No GT")
533
 
534
+ command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
535
+ with gr.Row():
536
+ submit_btn = gr.Button("Run Evaluation")
537
+ download_btn = gr.DownloadButton(label="Download Result")
538
+ log_output = gr.Textbox(label="Execution Logs", lines=20)
539
 
540
+ input_components = [
541
+ jsonl_file, split, subset, parallel,
542
+ min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
543
+ check_gt_only, no_gt
544
+ ]
545
 
546
+ for component in input_components:
547
+ component.change(generate_command, inputs=input_components, outputs=command_output)
548
 
549
 
550
+ def start_evaluation(command, jsonl_file, subset, split):
551
+ extra = subset + "_" if subset != "full" else ""
552
+ if jsonl_file is not None:
553
+ result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
554
+ else:
555
+ result_path = None
556
+
557
+ for log in stream_logs(command, jsonl_file):
558
+ if jsonl_file is not None:
559
+ yield log, gr.update(value=result_path, label=result_path), gr.update()
560
+ else:
561
+ yield log, gr.update(), gr.update()
562
+ is_running = False
563
+ result_file = find_result_file()
564
+ if result_file:
565
+ return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
566
+ # gr.Button(visible=False)#,
567
+ # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
568
+ else:
569
+ return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
570
+ # gr.Button("Run Evaluation", visible=True),
571
+ # gr.DownloadButton(visible=False))
572
+ submit_btn.click(start_evaluation,
573
+ inputs=[command_output, jsonl_file, subset, split],
574
+ outputs=[log_output, download_btn])
575
 
576
  with gr.Row():
577
  with gr.Accordion("πŸ“™ Citation", open=False):