MINGYISU commited on
Commit
01ad525
Β·
verified Β·
1 Parent(s): 9cfc538
Files changed (3) hide show
  1. app.py +1 -1
  2. utils.py +0 -1
  3. utils_v2.py +12 -9
app.py CHANGED
@@ -134,7 +134,7 @@ with gr.Blocks() as block:
134
  with gr.TabItem("πŸ“‘ Visual Doc", elem_id="qa-tab-table1", id=4):
135
  gr.Markdown(v2.TABLE_INTRODUCTION_D)
136
  data_component5 = gr.components.Dataframe(
137
- value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'VisDoc'),
138
  headers=v2.COLUMN_NAMES_D,
139
  type="pandas",
140
  datatype=v2.DATA_TITLE_TYPE_D,
 
134
  with gr.TabItem("πŸ“‘ Visual Doc", elem_id="qa-tab-table1", id=4):
135
  gr.Markdown(v2.TABLE_INTRODUCTION_D)
136
  data_component5 = gr.components.Dataframe(
137
+ value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'Visdoc-Overall'),
138
  headers=v2.COLUMN_NAMES_D,
139
  type="pandas",
140
  datatype=v2.DATA_TITLE_TYPE_D,
utils.py CHANGED
@@ -103,7 +103,6 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
103
  }
104
  }
105
  ```
106
- Note: We still accept the old format until 2025-06-30.
107
  Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
108
  To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
109
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
 
103
  }
104
  }
105
  ```
 
106
  Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
107
  To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
108
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
utils_v2.py CHANGED
@@ -20,7 +20,10 @@ DATASETS = {
20
  "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
21
  },
22
  "visdoc": {
23
- "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc', "ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2", "ViDoRe_esg_reports_v2_multilingual"]
 
 
 
24
  },
25
  "video": {
26
  "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
@@ -37,29 +40,29 @@ SPECIAL_METRICS = {
37
  }
38
 
39
  BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
40
- TASKS = ["Overall", "I-CLS", "I-QA", "I-RET", "I-VG", "VisDoc", "V-CLS", "V-QA", "V-RET", "V-MRET"]
41
  BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
42
 
43
- COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'VisDoc']
44
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
45
  ['number'] * 3
46
 
47
- SUB_TASKS_I = TASKS[1:5]
48
  TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
49
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
50
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
51
- ['number'] * (len(TASKS_I) + 4)
52
 
53
- SUB_TASKS_V = TASKS[6:10]
54
  TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
55
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
56
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
57
- ['number'] * (len(TASKS_V) + 4)
58
 
59
- TASKS_D = ['VisDoc'] + ALL_DATASETS_SPLITS['visdoc']
 
60
  COLUMN_NAMES_D = BASE_COLS + TASKS_D
61
  DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
62
- ['number'] * len(TASKS_D)
63
 
64
  TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
65
  Models are ranked based on **Overall**"""
 
20
  "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
21
  },
22
  "visdoc": {
23
+ "ViDoRe-V1": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry'],
24
+ "ViDoRe-V2": ["ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_economics_reports_v2", "ViDoRe_esg_reports_v2"], # Following Abandoned: "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2_multilingual"
25
+ "VisRAG": ['VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA'],
26
+ "VisDoc-OOD": ['ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
27
  },
28
  "video": {
29
  "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
 
40
  }
41
 
42
  BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
 
43
  BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
44
 
45
+ COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'Visdoc-Overall']
46
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
47
  ['number'] * 3
48
 
49
+ SUB_TASKS_I = ["I-CLS", "I-QA", "I-RET", "I-VG"]
50
  TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
51
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
52
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
53
+ ['number'] * len(TASKS_I + SUB_TASKS_I)
54
 
55
+ SUB_TASKS_V = ["V-CLS", "V-QA", "V-RET", "V-MRET"]
56
  TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
57
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
58
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
59
+ ['number'] * len(TASKS_V + SUB_TASKS_V)
60
 
61
+ SUB_TASKS_D = ['ViDoRe-V1', 'ViDoRe-V2', 'VisRAG', 'VisDoc-OOD']
62
+ TASKS_D = ['Visdoc-Overall'] + SUB_TASKS_D + ALL_DATASETS_SPLITS['visdoc']
63
  COLUMN_NAMES_D = BASE_COLS + TASKS_D
64
  DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
65
+ ['number'] * len(TASKS_D + SUB_TASKS_D)
66
 
67
  TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
68
  Models are ranked based on **Overall**"""