root commited on
Commit
58afb94
·
1 Parent(s): 2f4f14b

vbench2 filter

Browse files
Files changed (2) hide show
  1. app.py +73 -29
  2. constants.py +7 -2
app.py CHANGED
@@ -18,6 +18,12 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
18
  global data_component, filter_component
19
 
20
 
 
 
 
 
 
 
21
  def upload_file(files):
22
  file_paths = [file.name for file in files]
23
  return file_paths
@@ -384,12 +390,6 @@ def get_final_score_quality(df, selected_columns):
384
  return df
385
 
386
  def get_final_score2(df, selected_columns):
387
- category_to_dimension = {}
388
-
389
- for key, value in VBENCH2_DIM2CAT.items():
390
- if value not in category_to_dimension:
391
- category_to_dimension[value] = []
392
- category_to_dimension[value].append(key)
393
  score_names = []
394
  for cur_score in category_to_dimension:
395
  score_name = f"{cur_score} Score"
@@ -497,11 +497,11 @@ def get_all_df_long(selected_columns, dir=LONG_DIR):
497
  df = df.sort_values(by="Selected Score", ascending=False)
498
  return df
499
 
500
- def get_all_df2(dir=VBENCH2_DIR):
501
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
502
  submission_repo.git_pull()
503
  df = pd.read_csv(dir)
504
- df = get_final_score2(df, TASK_INFO_2)
505
  df = df.sort_values(by="Total Score", ascending=False)
506
  return df
507
 
@@ -621,13 +621,13 @@ def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample
621
  visible=True,
622
  )
623
  return filter_component#.value
 
624
 
625
-
626
- def on_filter_model_size_method_change_2(vbench_team_sample, vbench_team_eval=False):
627
- updated_data = get_all_df2(VBENCH2_DIR)
628
  print(updated_data)
629
  if vbench_team_sample:
630
- updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
631
  if vbench_team_eval:
632
  updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
633
 
@@ -642,7 +642,34 @@ def on_filter_model_size_method_change_2(vbench_team_sample, vbench_team_eval=Fa
642
  interactive=False,
643
  visible=True,
644
  )
645
- return filter_component#.value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
 
647
  block = gr.Blocks()
648
 
@@ -699,7 +726,7 @@ with block:
699
  datatype=DATA_TITILE_TYPE,
700
  interactive=False,
701
  visible=True,
702
- height=700,
703
  )
704
 
705
  choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
@@ -724,18 +751,29 @@ with block:
724
  TABLE_INTRODUCTION
725
  )
726
  with gr.Row():
727
- with gr.Column():
728
- vbench_team_filter_2 = gr.Checkbox(
729
- label="Sampled by VBench Team (Uncheck to view all submissions)",
730
- value=False,
731
- interactive=True
732
- )
733
- vbench_validate_filter_2 = gr.Checkbox(
734
- label="Evaluated by VBench Team (Uncheck to view all submissions)",
735
- value=True,
736
  interactive=True
737
  )
738
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
739
 
740
  data_component_2 = gr.components.Dataframe(
741
  value=get_baseline_df_2,
@@ -744,11 +782,17 @@ with block:
744
  datatype=VBENCH2_TITLE_TYPE,
745
  interactive=False,
746
  visible=True,
747
- height=700,
748
  )
749
- vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
750
- vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
751
-
 
 
 
 
 
 
752
  with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
753
  with gr.Accordion("INSTRUCTION", open=False):
754
  citation_button = gr.Textbox(
@@ -868,7 +912,7 @@ with block:
868
  datatype=DATA_TITILE_TYPE,
869
  interactive=False,
870
  visible=True,
871
- height=700,
872
  )
873
 
874
  choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
 
18
  global data_component, filter_component
19
 
20
 
21
+ category_to_dimension = {}
22
+ for key, value in VBENCH2_DIM2CAT.items():
23
+ if value not in category_to_dimension:
24
+ category_to_dimension[value] = []
25
+ category_to_dimension[value].append(key)
26
+
27
  def upload_file(files):
28
  file_paths = [file.name for file in files]
29
  return file_paths
 
390
  return df
391
 
392
  def get_final_score2(df, selected_columns):
 
 
 
 
 
 
393
  score_names = []
394
  for cur_score in category_to_dimension:
395
  score_name = f"{cur_score} Score"
 
497
  df = df.sort_values(by="Selected Score", ascending=False)
498
  return df
499
 
500
+ def get_all_df2(selected_columns, dir=VBENCH2_DIR):
501
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
502
  submission_repo.git_pull()
503
  df = pd.read_csv(dir)
504
+ df = get_final_score2(df, selected_columns)
505
  df = df.sort_values(by="Total Score", ascending=False)
506
  return df
507
 
 
621
  visible=True,
622
  )
623
  return filter_component#.value
624
+
625
 
626
+ def on_filter_model_size_method_change_2(selected_columns=TASK_INFO_2, vbench_team_sample=False, vbench_team_eval=False):
627
+ updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
 
628
  print(updated_data)
629
  if vbench_team_sample:
630
+ updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
631
  if vbench_team_eval:
632
  updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
633
 
 
642
  interactive=False,
643
  visible=True,
644
  )
645
+ return filter_component
646
+
647
+ def on_filter_model_size_method_score_change_2(select_score, vbench_team_sample=False, vbench_team_eval=False):
648
+ selected_columns = category_to_dimension[select_score]
649
+ print(select_score,"===>",selected_columns)
650
+ updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
651
+ print(updated_data)
652
+ if vbench_team_sample:
653
+ updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
654
+ if vbench_team_eval:
655
+ updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
656
+
657
+ present_columns = VBENCH2_MODEL_INFO_DEFAULT + [f"{select_score} Score"] + selected_columns
658
+ updated_headers = present_columns
659
+ updated_data = updated_data[present_columns]
660
+ updated_data = updated_data.sort_values(by=f"{select_score} Score", ascending=False)
661
+ updated_data = convert_scores_to_percentage(updated_data)
662
+ update_datatype = [VBENCH2_TITLE_TYPE[COLUMN_NAMES_2.index(x)] for x in updated_headers]
663
+ print(updated_data)
664
+ filter_component = gr.components.Dataframe(
665
+ value=updated_data,
666
+ headers=updated_headers,
667
+ type="pandas",
668
+ datatype=update_datatype,
669
+ interactive=False,
670
+ visible=True,
671
+ )
672
+ return filter_component, gr.update(value=selected_columns)
673
 
674
  block = gr.Blocks()
675
 
 
726
  datatype=DATA_TITILE_TYPE,
727
  interactive=False,
728
  visible=True,
729
+ # height=700,
730
  )
731
 
732
  choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
 
751
  TABLE_INTRODUCTION
752
  )
753
  with gr.Row():
754
+ vbench_team_filter_2 = gr.Checkbox(
755
+ label="Sampled by VBench Team (Uncheck to view all submissions)",
756
+ value=False,
 
 
 
 
 
 
757
  interactive=True
758
  )
759
+ vbench_validate_filter_2 = gr.Checkbox(
760
+ label="Evaluated by VBench Team (Uncheck to view all submissions)",
761
+ value=True,
762
+ interactive=True
763
+ )
764
+ with gr.Row():
765
+ vbench2_creativity_button = gr.Button("Show Creativity Score")
766
+ vbench2_commonsense_button = gr.Button("Show Commonsense Score")
767
+ vbench2_control_button = gr.Button("Show Controllability Score")
768
+ vbench2_human_button = gr.Button("Show Human Fidelity Score")
769
+ vbench2_physics_button = gr.Button("Show Physics Score")
770
+ with gr.Row():
771
+ vbench2_checkgroup = gr.CheckboxGroup(
772
+ choices=TASK_INFO_2,
773
+ value=TASK_INFO_2,
774
+ label="Evaluation Dimension",
775
+ interactive=True,
776
+ )
777
 
778
  data_component_2 = gr.components.Dataframe(
779
  value=get_baseline_df_2,
 
782
  datatype=VBENCH2_TITLE_TYPE,
783
  interactive=False,
784
  visible=True,
785
+ # height=700,
786
  )
787
+ vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
788
+ vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
789
+ # vbench2_checkgroup.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
790
+ vbench2_creativity_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Creativity"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
791
+ vbench2_commonsense_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Commonsense"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
792
+ vbench2_control_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Controllability"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
793
+ vbench2_human_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Human Fidelity"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
794
+ vbench2_physics_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Physics"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
795
+
796
  with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
797
  with gr.Accordion("INSTRUCTION", open=False):
798
  citation_button = gr.Textbox(
 
912
  datatype=DATA_TITILE_TYPE,
913
  interactive=False,
914
  visible=True,
915
+ # height=700,
916
  )
917
 
918
  choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
constants.py CHANGED
@@ -50,13 +50,16 @@ TASK_INFO = [
50
  ]
51
 
52
 
53
- MODEL_INFO_2 = [
54
  "Model (alphabetical order)",
55
  "Sampled by",
56
  "Evaluated by",
57
  "Accessibility",
58
  "Date",
59
- "Total Score",
 
 
 
60
  'Creativity Score',
61
  'Commonsense Score',
62
  'Controllability Score',
@@ -64,6 +67,8 @@ MODEL_INFO_2 = [
64
  'Physics Score'
65
  ]
66
 
 
 
67
  TASK_INFO_2 = [
68
  "Human Anatomy",
69
  "Human Clothes",
 
50
  ]
51
 
52
 
53
+ VBENCH2_MODEL_INFO_DEFAULT = [
54
  "Model (alphabetical order)",
55
  "Sampled by",
56
  "Evaluated by",
57
  "Accessibility",
58
  "Date",
59
+ "Total Score"
60
+ ]
61
+
62
+ VBENCH2_DIM_DEFAULT_INFO = [
63
  'Creativity Score',
64
  'Commonsense Score',
65
  'Controllability Score',
 
67
  'Physics Score'
68
  ]
69
 
70
+ MODEL_INFO_2 = VBENCH2_MODEL_INFO_DEFAULT + VBENCH2_DIM_DEFAULT_INFO
71
+
72
  TASK_INFO_2 = [
73
  "Human Anatomy",
74
  "Human Clothes",