MINGYISU commited on
Commit
037b103
Β·
2 Parent(s): 9cfc538 8e6921f

add vd sub task scores

Browse files
Files changed (3) hide show
  1. app.py +1 -1
  2. utils.py +0 -1
  3. utils_v2.py +30 -9
app.py CHANGED
@@ -134,7 +134,7 @@ with gr.Blocks() as block:
134
  with gr.TabItem("πŸ“‘ Visual Doc", elem_id="qa-tab-table1", id=4):
135
  gr.Markdown(v2.TABLE_INTRODUCTION_D)
136
  data_component5 = gr.components.Dataframe(
137
- value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'VisDoc'),
138
  headers=v2.COLUMN_NAMES_D,
139
  type="pandas",
140
  datatype=v2.DATA_TITLE_TYPE_D,
 
134
  with gr.TabItem("πŸ“‘ Visual Doc", elem_id="qa-tab-table1", id=4):
135
  gr.Markdown(v2.TABLE_INTRODUCTION_D)
136
  data_component5 = gr.components.Dataframe(
137
+ value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'Visdoc-Overall'),
138
  headers=v2.COLUMN_NAMES_D,
139
  type="pandas",
140
  datatype=v2.DATA_TITLE_TYPE_D,
utils.py CHANGED
@@ -103,7 +103,6 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
103
  }
104
  }
105
  ```
106
- Note: We still accept the old format until 2025-06-30.
107
  Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
108
  To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
109
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
 
103
  }
104
  }
105
  ```
 
106
  Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
107
  To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
108
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
utils_v2.py CHANGED
@@ -20,7 +20,10 @@ DATASETS = {
20
  "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
21
  },
22
  "visdoc": {
23
- "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc', "ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2", "ViDoRe_esg_reports_v2_multilingual"]
 
 
 
24
  },
25
  "video": {
26
  "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
@@ -37,29 +40,29 @@ SPECIAL_METRICS = {
37
  }
38
 
39
  BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
40
- TASKS = ["Overall", "I-CLS", "I-QA", "I-RET", "I-VG", "VisDoc", "V-CLS", "V-QA", "V-RET", "V-MRET"]
41
  BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
42
 
43
- COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'VisDoc']
44
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
45
  ['number'] * 3
46
 
47
- SUB_TASKS_I = TASKS[1:5]
48
  TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
49
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
50
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
51
- ['number'] * (len(TASKS_I) + 4)
52
 
53
- SUB_TASKS_V = TASKS[6:10]
54
  TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
55
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
56
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
57
- ['number'] * (len(TASKS_V) + 4)
58
 
59
- TASKS_D = ['VisDoc'] + ALL_DATASETS_SPLITS['visdoc']
 
60
  COLUMN_NAMES_D = BASE_COLS + TASKS_D
61
  DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
62
- ['number'] * len(TASKS_D)
63
 
64
  TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
65
  Models are ranked based on **Overall**"""
@@ -155,6 +158,24 @@ def rank_models(df, column='Overall', rank_name='Rank'):
155
  df[rank_name] = range(1, len(df) + 1)
156
  return df
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  def get_df():
159
  """Generates a DataFrame from the loaded data."""
160
  all_data = load_data()
 
20
  "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
21
  },
22
  "visdoc": {
23
+ "ViDoRe-V1": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry'],
24
+ "ViDoRe-V2": ["ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_economics_reports_v2", "ViDoRe_esg_reports_v2"], # Following Abandoned: "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2_multilingual"
25
+ "VisRAG": ['VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA'],
26
+ "VisDoc-OOD": ['ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
27
  },
28
  "video": {
29
  "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
 
40
  }
41
 
42
  BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
 
43
  BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
44
 
45
+ COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'Visdoc-Overall']
46
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
47
  ['number'] * 3
48
 
49
+ SUB_TASKS_I = ["I-CLS", "I-QA", "I-RET", "I-VG"]
50
  TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
51
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
52
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
53
+ ['number'] * len(TASKS_I + SUB_TASKS_I)
54
 
55
+ SUB_TASKS_V = ["V-CLS", "V-QA", "V-RET", "V-MRET"]
56
  TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
57
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
58
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
59
+ ['number'] * len(TASKS_V + SUB_TASKS_V)
60
 
61
+ SUB_TASKS_D = ['ViDoRe-V1', 'ViDoRe-V2', 'VisRAG', 'VisDoc-OOD']
62
+ TASKS_D = ['Visdoc-Overall'] + SUB_TASKS_D + ALL_DATASETS_SPLITS['visdoc']
63
  COLUMN_NAMES_D = BASE_COLS + TASKS_D
64
  DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
65
+ ['number'] * len(TASKS_D + SUB_TASKS_D)
66
 
67
  TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
68
  Models are ranked based on **Overall**"""
 
158
  df[rank_name] = range(1, len(df) + 1)
159
  return df
160
 
161
+ def add_color_to_column(df)
162
+ def render_color(text, color):
163
+ """Renders the text in a specific color for Markdown."""
164
+ return f"<span style='color:{color};'>{text}</span>"
165
+
166
+ df = df.copy()
167
+ SUB_TASKS = SUB_TASKS_I + SUB_TASKS_V + SUB_TASKS_D
168
+ MOD_OVERALL = ['Image-Overall', 'Video-Overall', 'Visdoc-Overall']
169
+ assert all(col in df.columns for col in ["Overall"] + MOD_OVERALL + SUB_TASKS), f"Missing columns in DataFrame: {SUB_TASKS}"
170
+ renamed_columns = {'Overall': render_color('Overall', 'red')}
171
+ for col in MOD_OVERALL:
172
+ renamed_columns[col] = render_color(col, 'yellow')
173
+ for col in SUB_TASKS:
174
+ renamed_columns[col] = render_color(col, 'blue')
175
+
176
+ df.rename(columns=renamed_columns)
177
+ return df
178
+
179
  def get_df():
180
  """Generates a DataFrame from the loaded data."""
181
  all_data = load_data()