Spaces:
Build error
Build error
root
commited on
Commit
·
58afb94
1
Parent(s):
2f4f14b
vbench2 filter
Browse files- app.py +73 -29
- constants.py +7 -2
app.py
CHANGED
@@ -18,6 +18,12 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
18 |
global data_component, filter_component
|
19 |
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def upload_file(files):
|
22 |
file_paths = [file.name for file in files]
|
23 |
return file_paths
|
@@ -384,12 +390,6 @@ def get_final_score_quality(df, selected_columns):
|
|
384 |
return df
|
385 |
|
386 |
def get_final_score2(df, selected_columns):
|
387 |
-
category_to_dimension = {}
|
388 |
-
|
389 |
-
for key, value in VBENCH2_DIM2CAT.items():
|
390 |
-
if value not in category_to_dimension:
|
391 |
-
category_to_dimension[value] = []
|
392 |
-
category_to_dimension[value].append(key)
|
393 |
score_names = []
|
394 |
for cur_score in category_to_dimension:
|
395 |
score_name = f"{cur_score} Score"
|
@@ -497,11 +497,11 @@ def get_all_df_long(selected_columns, dir=LONG_DIR):
|
|
497 |
df = df.sort_values(by="Selected Score", ascending=False)
|
498 |
return df
|
499 |
|
500 |
-
def get_all_df2(dir=VBENCH2_DIR):
|
501 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
502 |
submission_repo.git_pull()
|
503 |
df = pd.read_csv(dir)
|
504 |
-
df = get_final_score2(df,
|
505 |
df = df.sort_values(by="Total Score", ascending=False)
|
506 |
return df
|
507 |
|
@@ -621,13 +621,13 @@ def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample
|
|
621 |
visible=True,
|
622 |
)
|
623 |
return filter_component#.value
|
|
|
624 |
|
625 |
-
|
626 |
-
|
627 |
-
updated_data = get_all_df2(VBENCH2_DIR)
|
628 |
print(updated_data)
|
629 |
if vbench_team_sample:
|
630 |
-
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
631 |
if vbench_team_eval:
|
632 |
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
633 |
|
@@ -642,7 +642,34 @@ def on_filter_model_size_method_change_2(vbench_team_sample, vbench_team_eval=Fa
|
|
642 |
interactive=False,
|
643 |
visible=True,
|
644 |
)
|
645 |
-
return filter_component
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
646 |
|
647 |
block = gr.Blocks()
|
648 |
|
@@ -699,7 +726,7 @@ with block:
|
|
699 |
datatype=DATA_TITILE_TYPE,
|
700 |
interactive=False,
|
701 |
visible=True,
|
702 |
-
height=700,
|
703 |
)
|
704 |
|
705 |
choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
@@ -724,18 +751,29 @@ with block:
|
|
724 |
TABLE_INTRODUCTION
|
725 |
)
|
726 |
with gr.Row():
|
727 |
-
|
728 |
-
|
729 |
-
|
730 |
-
value=False,
|
731 |
-
interactive=True
|
732 |
-
)
|
733 |
-
vbench_validate_filter_2 = gr.Checkbox(
|
734 |
-
label="Evaluated by VBench Team (Uncheck to view all submissions)",
|
735 |
-
value=True,
|
736 |
interactive=True
|
737 |
)
|
738 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
739 |
|
740 |
data_component_2 = gr.components.Dataframe(
|
741 |
value=get_baseline_df_2,
|
@@ -744,11 +782,17 @@ with block:
|
|
744 |
datatype=VBENCH2_TITLE_TYPE,
|
745 |
interactive=False,
|
746 |
visible=True,
|
747 |
-
height=700,
|
748 |
)
|
749 |
-
vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
750 |
-
vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
751 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
752 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
753 |
with gr.Accordion("INSTRUCTION", open=False):
|
754 |
citation_button = gr.Textbox(
|
@@ -868,7 +912,7 @@ with block:
|
|
868 |
datatype=DATA_TITILE_TYPE,
|
869 |
interactive=False,
|
870 |
visible=True,
|
871 |
-
height=700,
|
872 |
)
|
873 |
|
874 |
choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
|
|
|
18 |
global data_component, filter_component
|
19 |
|
20 |
|
21 |
+
category_to_dimension = {}
|
22 |
+
for key, value in VBENCH2_DIM2CAT.items():
|
23 |
+
if value not in category_to_dimension:
|
24 |
+
category_to_dimension[value] = []
|
25 |
+
category_to_dimension[value].append(key)
|
26 |
+
|
27 |
def upload_file(files):
|
28 |
file_paths = [file.name for file in files]
|
29 |
return file_paths
|
|
|
390 |
return df
|
391 |
|
392 |
def get_final_score2(df, selected_columns):
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
score_names = []
|
394 |
for cur_score in category_to_dimension:
|
395 |
score_name = f"{cur_score} Score"
|
|
|
497 |
df = df.sort_values(by="Selected Score", ascending=False)
|
498 |
return df
|
499 |
|
500 |
+
def get_all_df2(selected_columns, dir=VBENCH2_DIR):
|
501 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
502 |
submission_repo.git_pull()
|
503 |
df = pd.read_csv(dir)
|
504 |
+
df = get_final_score2(df, selected_columns)
|
505 |
df = df.sort_values(by="Total Score", ascending=False)
|
506 |
return df
|
507 |
|
|
|
621 |
visible=True,
|
622 |
)
|
623 |
return filter_component#.value
|
624 |
+
|
625 |
|
626 |
+
def on_filter_model_size_method_change_2(selected_columns=TASK_INFO_2, vbench_team_sample=False, vbench_team_eval=False):
|
627 |
+
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
|
|
628 |
print(updated_data)
|
629 |
if vbench_team_sample:
|
630 |
+
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
631 |
if vbench_team_eval:
|
632 |
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
633 |
|
|
|
642 |
interactive=False,
|
643 |
visible=True,
|
644 |
)
|
645 |
+
return filter_component
|
646 |
+
|
647 |
+
def on_filter_model_size_method_score_change_2(select_score, vbench_team_sample=False, vbench_team_eval=False):
|
648 |
+
selected_columns = category_to_dimension[select_score]
|
649 |
+
print(select_score,"===>",selected_columns)
|
650 |
+
updated_data = get_all_df2(selected_columns, VBENCH2_DIR)
|
651 |
+
print(updated_data)
|
652 |
+
if vbench_team_sample:
|
653 |
+
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
654 |
+
if vbench_team_eval:
|
655 |
+
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
656 |
+
|
657 |
+
present_columns = VBENCH2_MODEL_INFO_DEFAULT + [f"{select_score} Score"] + selected_columns
|
658 |
+
updated_headers = present_columns
|
659 |
+
updated_data = updated_data[present_columns]
|
660 |
+
updated_data = updated_data.sort_values(by=f"{select_score} Score", ascending=False)
|
661 |
+
updated_data = convert_scores_to_percentage(updated_data)
|
662 |
+
update_datatype = [VBENCH2_TITLE_TYPE[COLUMN_NAMES_2.index(x)] for x in updated_headers]
|
663 |
+
print(updated_data)
|
664 |
+
filter_component = gr.components.Dataframe(
|
665 |
+
value=updated_data,
|
666 |
+
headers=updated_headers,
|
667 |
+
type="pandas",
|
668 |
+
datatype=update_datatype,
|
669 |
+
interactive=False,
|
670 |
+
visible=True,
|
671 |
+
)
|
672 |
+
return filter_component, gr.update(value=selected_columns)
|
673 |
|
674 |
block = gr.Blocks()
|
675 |
|
|
|
726 |
datatype=DATA_TITILE_TYPE,
|
727 |
interactive=False,
|
728 |
visible=True,
|
729 |
+
# height=700,
|
730 |
)
|
731 |
|
732 |
choosen_q.click(choose_all_quailty, inputs=None, outputs=[checkbox_group]).then(fn=on_filter_model_size_method_change, inputs=[ checkbox_group, vbench_team_filter,vbench_validate_filter], outputs=data_component)
|
|
|
751 |
TABLE_INTRODUCTION
|
752 |
)
|
753 |
with gr.Row():
|
754 |
+
vbench_team_filter_2 = gr.Checkbox(
|
755 |
+
label="Sampled by VBench Team (Uncheck to view all submissions)",
|
756 |
+
value=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
757 |
interactive=True
|
758 |
)
|
759 |
+
vbench_validate_filter_2 = gr.Checkbox(
|
760 |
+
label="Evaluated by VBench Team (Uncheck to view all submissions)",
|
761 |
+
value=True,
|
762 |
+
interactive=True
|
763 |
+
)
|
764 |
+
with gr.Row():
|
765 |
+
vbench2_creativity_button = gr.Button("Show Creativity Score")
|
766 |
+
vbench2_commonsense_button = gr.Button("Show Commonsense Score")
|
767 |
+
vbench2_control_button = gr.Button("Show Controllability Score")
|
768 |
+
vbench2_human_button = gr.Button("Show Human Fidelity Score")
|
769 |
+
vbench2_physics_button = gr.Button("Show Physics Score")
|
770 |
+
with gr.Row():
|
771 |
+
vbench2_checkgroup = gr.CheckboxGroup(
|
772 |
+
choices=TASK_INFO_2,
|
773 |
+
value=TASK_INFO_2,
|
774 |
+
label="Evaluation Dimension",
|
775 |
+
interactive=True,
|
776 |
+
)
|
777 |
|
778 |
data_component_2 = gr.components.Dataframe(
|
779 |
value=get_baseline_df_2,
|
|
|
782 |
datatype=VBENCH2_TITLE_TYPE,
|
783 |
interactive=False,
|
784 |
visible=True,
|
785 |
+
# height=700,
|
786 |
)
|
787 |
+
vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
788 |
+
vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
789 |
+
# vbench2_checkgroup.change(fn=on_filter_model_size_method_change_2, inputs=[vbench2_checkgroup, vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
790 |
+
vbench2_creativity_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Creativity"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
791 |
+
vbench2_commonsense_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Commonsense"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
792 |
+
vbench2_control_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Controllability"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
793 |
+
vbench2_human_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Human Fidelity"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
794 |
+
vbench2_physics_button.click(fn=on_filter_model_size_method_score_change_2, inputs=[gr.State("Physics"), vbench_team_filter_2, vbench_validate_filter], outputs=[data_component_2, vbench2_checkgroup])
|
795 |
+
|
796 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
797 |
with gr.Accordion("INSTRUCTION", open=False):
|
798 |
citation_button = gr.Textbox(
|
|
|
912 |
datatype=DATA_TITILE_TYPE,
|
913 |
interactive=False,
|
914 |
visible=True,
|
915 |
+
# height=700,
|
916 |
)
|
917 |
|
918 |
choosen_q_long.click(choose_all_quailty, inputs=None, outputs=[checkbox_group_long]).then(fn=on_filter_model_size_method_change_long, inputs=[ checkbox_group_long, vbench_team_filter_long, vbench_validate_filter_long], outputs=data_component)
|
constants.py
CHANGED
@@ -50,13 +50,16 @@ TASK_INFO = [
|
|
50 |
]
|
51 |
|
52 |
|
53 |
-
|
54 |
"Model (alphabetical order)",
|
55 |
"Sampled by",
|
56 |
"Evaluated by",
|
57 |
"Accessibility",
|
58 |
"Date",
|
59 |
-
"Total Score"
|
|
|
|
|
|
|
60 |
'Creativity Score',
|
61 |
'Commonsense Score',
|
62 |
'Controllability Score',
|
@@ -64,6 +67,8 @@ MODEL_INFO_2 = [
|
|
64 |
'Physics Score'
|
65 |
]
|
66 |
|
|
|
|
|
67 |
TASK_INFO_2 = [
|
68 |
"Human Anatomy",
|
69 |
"Human Clothes",
|
|
|
50 |
]
|
51 |
|
52 |
|
53 |
+
VBENCH2_MODEL_INFO_DEFAULT = [
|
54 |
"Model (alphabetical order)",
|
55 |
"Sampled by",
|
56 |
"Evaluated by",
|
57 |
"Accessibility",
|
58 |
"Date",
|
59 |
+
"Total Score"
|
60 |
+
]
|
61 |
+
|
62 |
+
VBENCH2_DIM_DEFAULT_INFO = [
|
63 |
'Creativity Score',
|
64 |
'Commonsense Score',
|
65 |
'Controllability Score',
|
|
|
67 |
'Physics Score'
|
68 |
]
|
69 |
|
70 |
+
MODEL_INFO_2 = VBENCH2_MODEL_INFO_DEFAULT + VBENCH2_DIM_DEFAULT_INFO
|
71 |
+
|
72 |
TASK_INFO_2 = [
|
73 |
"Human Anatomy",
|
74 |
"Human Clothes",
|