Terry Zhuo commited on
Commit
1a5b015
ยท
1 Parent(s): 51ed153
Files changed (1) hide show
  1. app.py +52 -52
app.py CHANGED
@@ -512,66 +512,66 @@ with main_block as demo:
512
  with gr.TabItem("๐Ÿš€ Request", id=4):
513
  gr.Markdown(SUBMISSION_TEXT_3)
514
 
515
- with gr.TabItem("๐Ÿ› ๏ธ Execute", id=5):
516
- gr.Markdown("# BigCodeBench Evaluator")
517
 
518
- with gr.Row():
519
- jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
520
- split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
521
- subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
522
 
523
- with gr.Row():
524
- parallel = gr.Number(label="Parallel (optional)", precision=0)
525
- min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
526
- max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
527
 
528
- with gr.Row():
529
- max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
530
- max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
531
- check_gt_only = gr.Checkbox(label="Check GT Only")
532
- no_gt = gr.Checkbox(label="No GT")
533
 
534
- command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
535
- with gr.Row():
536
- submit_btn = gr.Button("Run Evaluation")
537
- download_btn = gr.DownloadButton(label="Download Result")
538
- log_output = gr.Textbox(label="Execution Logs", lines=20)
539
 
540
- input_components = [
541
- jsonl_file, split, subset, parallel,
542
- min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
543
- check_gt_only, no_gt
544
- ]
545
 
546
- for component in input_components:
547
- component.change(generate_command, inputs=input_components, outputs=command_output)
548
 
549
 
550
- def start_evaluation(command, jsonl_file, subset, split):
551
- extra = subset + "_" if subset != "full" else ""
552
- if jsonl_file is not None:
553
- result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
554
- else:
555
- result_path = None
556
-
557
- for log in stream_logs(command, jsonl_file):
558
- if jsonl_file is not None:
559
- yield log, gr.update(value=result_path, label=result_path), gr.update()
560
- else:
561
- yield log, gr.update(), gr.update()
562
- is_running = False
563
- result_file = find_result_file()
564
- if result_file:
565
- return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
566
- # gr.Button(visible=False)#,
567
- # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
568
- else:
569
- return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
570
- # gr.Button("Run Evaluation", visible=True),
571
- # gr.DownloadButton(visible=False))
572
- submit_btn.click(start_evaluation,
573
- inputs=[command_output, jsonl_file, subset, split],
574
- outputs=[log_output, download_btn])
575
 
576
  with gr.Row():
577
  with gr.Accordion("๐Ÿ“™ Citation", open=False):
 
512
  with gr.TabItem("๐Ÿš€ Request", id=4):
513
  gr.Markdown(SUBMISSION_TEXT_3)
514
 
515
+ # with gr.TabItem("๐Ÿ› ๏ธ Execute", id=5):
516
+ # gr.Markdown("# BigCodeBench Evaluator")
517
 
518
+ # with gr.Row():
519
+ # jsonl_file = gr.File(label="Upload JSONL file", file_types=[".jsonl"])
520
+ # split = gr.Dropdown(choices=["complete", "instruct"], label="Split", value="complete")
521
+ # subset = gr.Dropdown(choices=["hard"], label="Subset", value="hard")
522
 
523
+ # with gr.Row():
524
+ # parallel = gr.Number(label="Parallel (optional)", precision=0)
525
+ # min_time_limit = gr.Number(label="Min Time Limit", value=1, precision=1)
526
+ # max_as_limit = gr.Number(label="Max AS Limit", value=25*1024, precision=0)
527
 
528
+ # with gr.Row():
529
+ # max_data_limit = gr.Number(label="Max Data Limit", value=25*1024, precision=0)
530
+ # max_stack_limit = gr.Number(label="Max Stack Limit", value=10, precision=0)
531
+ # check_gt_only = gr.Checkbox(label="Check GT Only")
532
+ # no_gt = gr.Checkbox(label="No GT")
533
 
534
+ # command_output = gr.Textbox(label="Command", value=default_command, interactive=False)
535
+ # with gr.Row():
536
+ # submit_btn = gr.Button("Run Evaluation")
537
+ # download_btn = gr.DownloadButton(label="Download Result")
538
+ # log_output = gr.Textbox(label="Execution Logs", lines=20)
539
 
540
+ # input_components = [
541
+ # jsonl_file, split, subset, parallel,
542
+ # min_time_limit, max_as_limit, max_data_limit, max_stack_limit,
543
+ # check_gt_only, no_gt
544
+ # ]
545
 
546
+ # for component in input_components:
547
+ # component.change(generate_command, inputs=input_components, outputs=command_output)
548
 
549
 
550
+ # def start_evaluation(command, jsonl_file, subset, split):
551
+ # extra = subset + "_" if subset != "full" else ""
552
+ # if jsonl_file is not None:
553
+ # result_path = os.path.basename(jsonl_file.name).replace(".jsonl", f"_{extra}eval_results.json")
554
+ # else:
555
+ # result_path = None
556
+
557
+ # for log in stream_logs(command, jsonl_file):
558
+ # if jsonl_file is not None:
559
+ # yield log, gr.update(value=result_path, label=result_path), gr.update()
560
+ # else:
561
+ # yield log, gr.update(), gr.update()
562
+ # is_running = False
563
+ # result_file = find_result_file()
564
+ # if result_file:
565
+ # return gr.update(label="Evaluation completed. Result file found."), gr.update(value=result_file)
566
+ # # gr.Button(visible=False)#,
567
+ # # gr.DownloadButton(label="Download Result", value=result_file, visible=True))
568
+ # else:
569
+ # return gr.update(label="Evaluation completed. No result file found."), gr.update(value=result_path)
570
+ # # gr.Button("Run Evaluation", visible=True),
571
+ # # gr.DownloadButton(visible=False))
572
+ # submit_btn.click(start_evaluation,
573
+ # inputs=[command_output, jsonl_file, subset, split],
574
+ # outputs=[log_output, download_btn])
575
 
576
  with gr.Row():
577
  with gr.Accordion("๐Ÿ“™ Citation", open=False):