ynhe commited on
Commit
7340433
1 Parent(s): fa9b35a
Files changed (2) hide show
  1. app.py +128 -5
  2. constants.py +65 -0
app.py CHANGED
@@ -74,6 +74,15 @@ def get_normalized_df(df):
74
  normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
75
  return normalize_df
76
 
 
 
 
 
 
 
 
 
 
77
  def calculate_selected_score(df, selected_columns):
78
  # selected_score = df[selected_columns].sum(axis=1)
79
  selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
@@ -91,6 +100,23 @@ def calculate_selected_score(df, selected_columns):
91
  selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
92
  return selected_score.fillna(0.0)
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  def get_final_score(df, selected_columns):
95
  normalize_df = get_normalized_df(df)
96
  #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
@@ -118,6 +144,34 @@ def get_final_score(df, selected_columns):
118
  df.insert(1, 'Selected Score', selected_score)
119
  return df
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  def get_final_score_quality(df, selected_columns):
123
  normalize_df = get_normalized_df(df)
@@ -138,8 +192,8 @@ def get_final_score_quality(df, selected_columns):
138
  return df
139
 
140
  def get_baseline_df():
141
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
142
- submission_repo.git_pull()
143
  df = pd.read_csv(CSV_DIR)
144
  df = get_final_score(df, checkbox_group.value)
145
  df = df.sort_values(by="Selected Score", ascending=False)
@@ -149,8 +203,8 @@ def get_baseline_df():
149
  return df
150
 
151
  def get_baseline_df_quality():
152
- submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
153
- submission_repo.git_pull()
154
  df = pd.read_csv(QUALITY_DIR)
155
  df = get_final_score_quality(df, checkbox_group_quality.value)
156
  df = df.sort_values(by="Selected Score", ascending=False)
@@ -159,6 +213,17 @@ def get_baseline_df_quality():
159
  df = convert_scores_to_percentage(df)
160
  return df
161
 
 
 
 
 
 
 
 
 
 
 
 
162
  def get_all_df(selected_columns, dir=CSV_DIR):
163
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
164
  submission_repo.git_pull()
@@ -175,6 +240,13 @@ def get_all_df_quality(selected_columns, dir=QUALITY_DIR):
175
  df = df.sort_values(by="Selected Score", ascending=False)
176
  return df
177
 
 
 
 
 
 
 
 
178
 
179
  def convert_scores_to_percentage(df):
180
  # 对DataFrame中的每一列(除了'name'列)进行操作
@@ -239,6 +311,28 @@ def on_filter_model_size_method_change_quality(selected_columns):
239
  )
240
  return filter_component#.value
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  block = gr.Blocks()
244
 
@@ -322,8 +416,37 @@ with block:
322
 
323
  checkbox_group_quality.change(fn=on_filter_model_size_method_change_quality, inputs=[checkbox_group_quality], outputs=data_component_quality)
324
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  # table 2
326
- with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=3):
327
  gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
328
 
329
  # table 3
 
74
  normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
75
  return normalize_df
76
 
77
+ def get_normalized_i2v_df(df):
78
+ normalize_df = df.copy().fillna(0.0)
79
+ for column in normalize_df.columns[1:]:
80
+ min_val = NORMALIZE_DIC_I2V[column]['Min']
81
+ max_val = NORMALIZE_DIC_I2V[column]['Max']
82
+ normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
83
+ return normalize_df
84
+
85
+
86
  def calculate_selected_score(df, selected_columns):
87
  # selected_score = df[selected_columns].sum(axis=1)
88
  selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
 
100
  selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
101
  return selected_score.fillna(0.0)
102
 
103
+ def calculate_selected_score_i2v(df, selected_columns):
104
+ # selected_score = df[selected_columns].sum(axis=1)
105
+ selected_QUALITY = [i for i in selected_columns if i in I2V_QUALITY_LIST]
106
+ selected_I2V = [i for i in selected_columns if i in I2V_LIST]
107
+ selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_QUALITY])
108
+ selected_i2v_score = df[selected_I2V].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_I2V ])
109
+ if selected_quality_score.isna().any().any() and selected_i2v_score.isna().any().any():
110
+ selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
111
+ return selected_score.fillna(0.0)
112
+ if selected_quality_score.isna().any().any():
113
+ return selected_i2v_score
114
+ if selected_i2v_score.isna().any().any():
115
+ return selected_quality_score
116
+ print(selected_i2v_score,selected_quality_score )
117
+ selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
118
+ return selected_score.fillna(0.0)
119
+
120
  def get_final_score(df, selected_columns):
121
  normalize_df = get_normalized_df(df)
122
  #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
 
144
  df.insert(1, 'Selected Score', selected_score)
145
  return df
146
 
147
+ def get_final_score_i2v(df, selected_columns):
148
+ normalize_df = get_normalized_i2v_df(df)
149
+ #final_score = normalize_df.drop('name', axis=1).sum(axis=1)
150
+ for name in normalize_df.drop('Model Name (clickable)', axis=1).drop('Video-Text Camera Motion', axis=1):
151
+ normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name]
152
+ quality_score = normalize_df[I2V_QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_QUALITY_LIST])
153
+ i2v_score = normalize_df[I2V_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_LIST ])
154
+ final_score = (quality_score * I2V_QUALITY_WEIGHT + i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
155
+ if 'Total Score' in df:
156
+ df['Total Score'] = final_score
157
+ else:
158
+ df.insert(1, 'Total Score', final_score)
159
+ if 'I2V Score' in df:
160
+ df['I2V Score'] = i2v_score
161
+ else:
162
+ df.insert(2, 'I2V Score', i2v_score)
163
+ if 'Quality Score' in df:
164
+ df['Quality Score'] = quality_score
165
+ else:
166
+ df.insert(3, 'Quality Score', quality_score)
167
+ selected_score = calculate_selected_score(normalize_df, selected_columns)
168
+ if 'Selected Score' in df:
169
+ df['Selected Score'] = selected_score
170
+ else:
171
+ df.insert(1, 'Selected Score', selected_score)
172
+ return df
173
+
174
+
175
 
176
  def get_final_score_quality(df, selected_columns):
177
  normalize_df = get_normalized_df(df)
 
192
  return df
193
 
194
  def get_baseline_df():
195
+ # submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
196
+ # submission_repo.git_pull()
197
  df = pd.read_csv(CSV_DIR)
198
  df = get_final_score(df, checkbox_group.value)
199
  df = df.sort_values(by="Selected Score", ascending=False)
 
203
  return df
204
 
205
  def get_baseline_df_quality():
206
+ # submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
207
+ # submission_repo.git_pull()
208
  df = pd.read_csv(QUALITY_DIR)
209
  df = get_final_score_quality(df, checkbox_group_quality.value)
210
  df = df.sort_values(by="Selected Score", ascending=False)
 
213
  df = convert_scores_to_percentage(df)
214
  return df
215
 
216
+ def get_baseline_df_i2v():
217
+ # submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
218
+ # submission_repo.git_pull()
219
+ df = pd.read_csv(I2V_DIR)
220
+ df = get_final_score_i2v(df, checkbox_group_i2v.value)
221
+ df = df.sort_values(by="Selected Score", ascending=False)
222
+ present_columns = MODEL_INFO_TAB_I2V + checkbox_group_i2v.value
223
+ df = df[present_columns]
224
+ df = convert_scores_to_percentage(df)
225
+ return df
226
+
227
  def get_all_df(selected_columns, dir=CSV_DIR):
228
  submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
229
  submission_repo.git_pull()
 
240
  df = df.sort_values(by="Selected Score", ascending=False)
241
  return df
242
 
243
+ def get_all_df_i2v(selected_columns, dir=I2V_DIR):
244
+ # submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
245
+ # submission_repo.git_pull()
246
+ df = pd.read_csv(dir)
247
+ df = get_final_score_i2v(df, selected_columns)
248
+ df = df.sort_values(by="Selected Score", ascending=False)
249
+ return df
250
 
251
  def convert_scores_to_percentage(df):
252
  # 对DataFrame中的每一列(除了'name'列)进行操作
 
311
  )
312
  return filter_component#.value
313
 
314
+ def on_filter_model_size_method_change_i2v(selected_columns):
315
+
316
+ updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
317
+ selected_columns = [item for item in I2V_TAB if item in selected_columns]
318
+ present_columns = MODEL_INFO_TAB_I2V + selected_columns
319
+ updated_data = updated_data[present_columns]
320
+ updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
321
+ updated_data = convert_scores_to_percentage(updated_data)
322
+ updated_headers = present_columns
323
+ update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
324
+ import ipdb
325
+ ipdb.set_trace()
326
+ # print(updated_data,present_columns,update_datatype)
327
+ filter_component = gr.components.Dataframe(
328
+ value=updated_data,
329
+ headers=updated_headers,
330
+ type="pandas",
331
+ datatype=update_datatype,
332
+ interactive=False,
333
+ visible=True,
334
+ )
335
+ return filter_component#.value
336
 
337
  block = gr.Blocks()
338
 
 
416
 
417
  checkbox_group_quality.change(fn=on_filter_model_size_method_change_quality, inputs=[checkbox_group_quality], outputs=data_component_quality)
418
 
419
+ with gr.TabItem("VBench-I2V", elem_id="vbench-tab-table", id=3):
420
+ with gr.Accordion("NOTE", open=False):
421
+ i2v_note_button = gr.Textbox(
422
+ value=I2V_CLAIM_TEXT,
423
+ label="",
424
+ elem_id="quality-button",
425
+ lines=3,
426
+ )
427
+ with gr.Row():
428
+ with gr.Column(scale=1.0):
429
+ # selection for column part:
430
+ checkbox_group_i2v = gr.CheckboxGroup(
431
+ choices=I2V_TAB,
432
+ value=I2V_TAB,
433
+ label="Evaluation Quality Dimension",
434
+ interactive=True,
435
+ )
436
+
437
+ data_component_i2v = gr.components.Dataframe(
438
+ value=get_baseline_df_i2v,
439
+ headers=COLUMN_NAMES_I2V,
440
+ type="pandas",
441
+ datatype=I2V_TITILE_TYPE,
442
+ interactive=False,
443
+ visible=True,
444
+ )
445
+
446
+ checkbox_group_i2v.change(fn=on_filter_model_size_method_change_i2v, inputs=[checkbox_group_i2v], outputs=data_component_i2v)
447
+
448
  # table 2
449
+ with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=4):
450
  gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
451
 
452
  # table 3
constants.py CHANGED
@@ -14,6 +14,14 @@ MODEL_INFO_TAB_QUALITY = [
14
  "Selected Score"
15
  ]
16
 
 
 
 
 
 
 
 
 
17
  TASK_INFO = [
18
  "subject consistency",
19
  "background consistency",
@@ -71,6 +79,32 @@ QUALITY_TAB = [
71
  "imaging quality",
72
  "dynamic degree",]
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  DIM_WEIGHT = {
75
  "subject consistency":1,
76
  "background consistency":1,
@@ -90,18 +124,35 @@ DIM_WEIGHT = {
90
  "overall consistency":1
91
  }
92
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  SEMANTIC_WEIGHT = 1
94
  QUALITY_WEIGHT = 4
 
 
95
 
96
  DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
 
97
 
98
  SUBMISSION_NAME = "vbench_leaderboard_submission"
99
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Vchitect/", SUBMISSION_NAME)
100
  CSV_DIR = "./vbench_leaderboard_submission/results.csv"
101
  QUALITY_DIR = "./vbench_leaderboard_submission/quality.csv"
 
102
 
103
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
104
  COLUMN_NAMES_QUALITY = MODEL_INFO_TAB_QUALITY + QUALITY_TAB
 
105
 
106
  LEADERBORAD_INTRODUCTION = """# VBench Leaderboard
107
 
@@ -145,6 +196,8 @@ CITATION_BUTTON_TEXT = r"""@inproceedings{huang2023vbench,
145
 
146
  QUALITY_CLAIM_TEXT = "We use all the videos on Sora website (https://openai.com/sora) for a preliminary evaluation, including the failure case videos Sora provided."
147
 
 
 
148
  NORMALIZE_DIC = {
149
  "subject consistency": {"Min": 0.1462, "Max": 1.0},
150
  "background consistency": {"Min": 0.2615, "Max": 1.0},
@@ -162,4 +215,16 @@ NORMALIZE_DIC = {
162
  "appearance style": {"Min": 0.0009, "Max": 0.2855},
163
  "temporal style": {"Min": 0.0, "Max": 0.364},
164
  "overall consistency": {"Min": 0.0, "Max": 0.364}
 
 
 
 
 
 
 
 
 
 
 
 
165
  }
 
14
  "Selected Score"
15
  ]
16
 
17
+ MODEL_INFO_TAB_I2V = [
18
+ "Model Name (clickable)",
19
+ "Total Score",
20
+ "I2V Score",
21
+ "Quality Score",
22
+ "Selected Score"
23
+ ]
24
+
25
  TASK_INFO = [
26
  "subject consistency",
27
  "background consistency",
 
79
  "imaging quality",
80
  "dynamic degree",]
81
 
82
+ I2V_LIST = [
83
+ "Video-Image Subject Consistency",
84
+ "Video-Image Background Consistency",
85
+ ]
86
+
87
+ I2V_QUALITY_LIST = [
88
+ "Subject Consistency",
89
+ "Background Consistency",
90
+ "Motion Smoothness",
91
+ "Dynamic Degree",
92
+ "Aesthetic Quality",
93
+ "Imaging Quality"
94
+ ]
95
+
96
+ I2V_TAB = [
97
+ "Video-Text Camera Motion",
98
+ "Video-Image Subject Consistency",
99
+ "Video-Image Background Consistency",
100
+ "Subject Consistency",
101
+ "Background Consistency",
102
+ "Motion Smoothness",
103
+ "Dynamic Degree",
104
+ "Aesthetic Quality",
105
+ "Imaging Quality"
106
+ ]
107
+
108
  DIM_WEIGHT = {
109
  "subject consistency":1,
110
  "background consistency":1,
 
124
  "overall consistency":1
125
  }
126
 
127
+ DIM_WEIGHT_I2V = {
128
+ "Video-Text Camera Motion": 0.1,
129
+ "Video-Image Subject Consistency": 1,
130
+ "Video-Image Background Consistency": 1,
131
+ "Subject Consistency": 1,
132
+ "Background Consistency": 1,
133
+ "Motion Smoothness": 1,
134
+ "Dynamic Degree": 0.5,
135
+ "Aesthetic Quality": 1,
136
+ "Imaging Quality": 1
137
+ }
138
+
139
  SEMANTIC_WEIGHT = 1
140
  QUALITY_WEIGHT = 4
141
+ I2V_WEIGHT = 1.0
142
+ I2V_QUALITY_WEIGHT = 1.0
143
 
144
  DATA_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
145
+ I2V_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
146
 
147
  SUBMISSION_NAME = "vbench_leaderboard_submission"
148
  SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Vchitect/", SUBMISSION_NAME)
149
  CSV_DIR = "./vbench_leaderboard_submission/results.csv"
150
  QUALITY_DIR = "./vbench_leaderboard_submission/quality.csv"
151
+ I2V_DIR = "./vbench_leaderboard_submission/i2v_results.csv"
152
 
153
  COLUMN_NAMES = MODEL_INFO + TASK_INFO
154
  COLUMN_NAMES_QUALITY = MODEL_INFO_TAB_QUALITY + QUALITY_TAB
155
+ COLUMN_NAMES_I2V = MODEL_INFO_TAB_I2V + I2V_TAB
156
 
157
  LEADERBORAD_INTRODUCTION = """# VBench Leaderboard
158
 
 
196
 
197
  QUALITY_CLAIM_TEXT = "We use all the videos on Sora website (https://openai.com/sora) for a preliminary evaluation, including the failure case videos Sora provided."
198
 
199
+ I2V_CLAIM_TEXT = "Since the open-sourced SVD models do not accept text input during the I2V stage, we are unable to evaluate its `camera motion` in terms of `video-text consistency`. The total score is calculated based on all dimensions except `camera motion`."
200
+
201
  NORMALIZE_DIC = {
202
  "subject consistency": {"Min": 0.1462, "Max": 1.0},
203
  "background consistency": {"Min": 0.2615, "Max": 1.0},
 
215
  "appearance style": {"Min": 0.0009, "Max": 0.2855},
216
  "temporal style": {"Min": 0.0, "Max": 0.364},
217
  "overall consistency": {"Min": 0.0, "Max": 0.364}
218
+ }
219
+
220
+ NORMALIZE_DIC_I2V = {
221
+ "Video-Text Camera Motion" :{"Min": 0.0, "Max":1.0 },
222
+ "Video-Image Subject Consistency":{"Min": 0.1462, "Max": 1.0},
223
+ "Video-Image Background Consistency":{"Min": 0.2615, "Max":1.0 },
224
+ "Subject Consistency":{"Min": 0.1462, "Max": 1.0},
225
+ "Background Consistency":{"Min": 0.2615, "Max": 1.0 },
226
+ "Motion Smoothness":{"Min": 0.7060, "Max": 0.9975},
227
+ "Dynamic Degree":{"Min": 0.0, "Max": 1.0},
228
+ "Aesthetic Quality":{"Min": 0.0, "Max": 1.0},
229
+ "Imaging Quality":{"Min": 0.0, "Max": 1.0}
230
  }