MINGYISU commited on
Commit
baae497
·
verified ·
1 Parent(s): de61b89

add v1 scores back to v2 image tab (#51)

Browse files

- integrate v1 scores into v2 (a84b2867d9a4a0e33270363b0714046fd615e200)
- fixed issue (05eb353153751b5e47a54ab85a60184faf506c9a)
- fixed (8cbf5e6fbd748f32853ce8bf32a6ce431b94d3c6)

Files changed (3) hide show
  1. app.py +17 -6
  2. utils.py +3 -19
  3. utils_v2.py +6 -4
app.py CHANGED
@@ -11,12 +11,9 @@ def update_table(query, min_size, max_size, selected_tasks=None):
11
  filtered_df = filtered_df[selected_columns]
12
  return filtered_df
13
 
14
- def update_table_v2(query, min_size, max_size, selected_tasks=None):
15
  df = v2.get_df()
16
  filtered_df = v2.search_and_filter_models(df, query, min_size, max_size)
17
- if selected_tasks and len(selected_tasks) > 0:
18
- selected_columns = v2.BASE_COLS + selected_tasks
19
- filtered_df = filtered_df[selected_columns]
20
  return filtered_df
21
 
22
  with gr.Blocks() as block:
@@ -42,6 +39,7 @@ with gr.Blocks() as block:
42
  elem_id="search-bar"
43
  )
44
 
 
45
  df2 = v2.get_df()
46
  min_size2, max_size2 = get_size_range(df2)
47
 
@@ -92,11 +90,25 @@ with gr.Blocks() as block:
92
  )
93
  refresh_button2.click(fn=v2.refresh_data, outputs=data_component2)
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # table 2, image scores only
96
  with gr.TabItem("🖼️ Image", elem_id="qa-tab-table1", id=2):
97
  gr.Markdown(v2.TABLE_INTRODUCTION_I)
 
98
  data_component3 = gr.components.Dataframe(
99
- value=v2.rank_models(df2[v2.COLUMN_NAMES_I], 'Image-Overall'),
100
  headers=v2.COLUMN_NAMES_I,
101
  type="pandas",
102
  datatype=v2.DATA_TITLE_TYPE_I,
@@ -160,7 +172,6 @@ with gr.Blocks() as block:
160
  elem_id="search-bar"
161
  )
162
 
163
- df = get_df()
164
  min_size, max_size = get_size_range(df)
165
 
166
  with gr.Row():
 
11
  filtered_df = filtered_df[selected_columns]
12
  return filtered_df
13
 
14
+ def update_table_v2(query, min_size, max_size):
15
  df = v2.get_df()
16
  filtered_df = v2.search_and_filter_models(df, query, min_size, max_size)
 
 
 
17
  return filtered_df
18
 
19
  with gr.Blocks() as block:
 
39
  elem_id="search-bar"
40
  )
41
 
42
+ df = get_df()
43
  df2 = v2.get_df()
44
  min_size2, max_size2 = get_size_range(df2)
45
 
 
90
  )
91
  refresh_button2.click(fn=v2.refresh_data, outputs=data_component2)
92
 
93
+
94
+ def get_special_processed_df2():
95
+ """Temporary special processing to merge v1 scores with v2 image scores.
96
+ Will be removed later after v2 is fully adopted."""
97
+ df2_i = df2[v2.COLUMN_NAMES_I]
98
+ df1 = df.rename(columns={'V1-Overall': 'Image-Overall'})
99
+ df1 = df1[v2.BASE_COLS + v2.SUB_TASKS_I + ['Image-Overall']]
100
+ combined_df = pd.concat([df1, df2_i], ignore_index=True)
101
+ for task in v2.TASKS_I:
102
+ combined_df[task] = combined_df[task].apply(lambda score: '-' if pd.isna(score) else score)
103
+ combined_df = v2.rank_models(combined_df, 'Image-Overall')
104
+ return combined_df[v2.COLUMN_NAMES_I]
105
+
106
  # table 2, image scores only
107
  with gr.TabItem("🖼️ Image", elem_id="qa-tab-table1", id=2):
108
  gr.Markdown(v2.TABLE_INTRODUCTION_I)
109
+ df2_i = get_special_processed_df2()
110
  data_component3 = gr.components.Dataframe(
111
+ value=df2_i,
112
  headers=v2.COLUMN_NAMES_I,
113
  type="pandas",
114
  datatype=v2.DATA_TITLE_TYPE_I,
 
172
  elem_id="search-bar"
173
  )
174
 
 
175
  min_size, max_size = get_size_range(df)
176
 
177
  with gr.Row():
utils.py CHANGED
@@ -38,8 +38,8 @@ This comprehensive suite enables robust evaluation of multimodal embedding model
38
  | [**🤗Hugging Face**](https://huggingface.co/datasets/TIGER-Lab/MMEB-V2) |
39
  """
40
 
41
- TABLE_INTRODUCTION = """***Important Notes:*** \n
42
- **We will be depreciating the MMEB-V1 leaderboard soon, and we will be releasing MMEB-V2 with more detailed scores and automatic evaluation.** \n"""
43
 
44
  LEADERBOARD_INFO = """
45
  ## Dataset Summary
@@ -103,23 +103,7 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
103
  }
104
  }
105
  ```
106
-
107
- ### **TO SUBMIT V1 ONLY (Depreciated, but we still accept this format until 2025-06-30)**
108
- ```json
109
- [
110
- {
111
- "Model": "<Model Name>",
112
- "URL": "<Model URL>" or null,
113
- "Model Size(B)": 1000 or null,
114
- "Data Source": "Self-Reported",
115
- "V1-Overall": 50.0,
116
- "I-CLS": 50.0,
117
- "I-QA": 50.0,
118
- "I-RET": 50.0,
119
- "I-VG": 50.0
120
- },
121
- ]
122
- ```
123
  Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
124
  To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
125
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
 
38
  | [**🤗Hugging Face**](https://huggingface.co/datasets/TIGER-Lab/MMEB-V2) |
39
  """
40
 
41
+ TABLE_INTRODUCTION = """***Important Notes: ***
42
+ This is the MMEB-V1 leaderboard, which is now deprecated. MMEB-V1 is now the Image section of MMEB-V2, and the results on this leaderboard have been integrated into MMEB-V2 Image tab. For researchers relying on MMEB-V1, we recommend transitioning to MMEB-V2 for more comprehensive evaluation metrics and support. Thank you for your collaborations and understanding! \n"""
43
 
44
  LEADERBOARD_INFO = """
45
  ## Dataset Summary
 
103
  }
104
  }
105
  ```
106
+ Note: We still accept the old format until 2025-06-30, and after that we no longer support the old format, so it is important to follow the new format for your submission. \n
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
108
  To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
109
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
utils_v2.py CHANGED
@@ -44,12 +44,14 @@ COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'VisDoc
44
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
45
  ['number'] * 3
46
 
47
- TASKS_I = ['Image-Overall'] + TASKS[1:5] + ALL_DATASETS_SPLITS['image']
 
48
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
49
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
50
  ['number'] * (len(TASKS_I) + 4)
51
 
52
- TASKS_V = ['Video-Overall'] + TASKS[6:10] + ALL_DATASETS_SPLITS['video']
 
53
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
54
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
55
  ['number'] * (len(TASKS_V) + 4)
@@ -147,10 +149,10 @@ def generate_model_row(data):
147
  row.update(scores)
148
  return row
149
 
150
- def rank_models(df, column='Overall'):
151
  """Ranks the models based on the specific score."""
152
  df = df.sort_values(by=column, ascending=False).reset_index(drop=True)
153
- df['Rank'] = range(1, len(df) + 1)
154
  return df
155
 
156
  def get_df():
 
44
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
45
  ['number'] * 3
46
 
47
+ SUB_TASKS_I = TASKS[1:5]
48
+ TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
49
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
50
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
51
  ['number'] * (len(TASKS_I) + 4)
52
 
53
+ SUB_TASKS_V = TASKS[6:10]
54
+ TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
55
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
56
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
57
  ['number'] * (len(TASKS_V) + 4)
 
149
  row.update(scores)
150
  return row
151
 
152
+ def rank_models(df, column='Overall', rank_name='Rank'):
153
  """Ranks the models based on the specific score."""
154
  df = df.sort_values(by=column, ascending=False).reset_index(drop=True)
155
+ df[rank_name] = range(1, len(df) + 1)
156
  return df
157
 
158
  def get_df():