Spaces:

TIGER-Lab
/

MMEB-Leaderboard

Running

App Files Files Community

MINGYISU commited on Jul 8

Commit

a79c335

1 Parent(s): dc50cf5

updated info

Browse files

Files changed (3) hide show

datasets.py +35 -0
utils.py +8 -3
utils_v2.py +10 -37

datasets.py ADDED Viewed

	@@ -0,0 +1,35 @@

+def sum_lol(lol):
+    assert isinstance(lol, list) and all(isinstance(i, list) for i in lol), f"Input should be a list of lists, got {type(lol)}"
+    total = []
+    for sublist in lol:
+        total.extend(sublist)
+    return total
+SCORE_BASE_DIR = "scores"
+META_DATA = ["model_name", "model_size", "url"]
+DATASETS = {
+    "image": {
+        "I-CLS": ['VOC2007', 'N24News', 'SUN397', 'ObjectNet', 'Country211', 'Place365', 'ImageNet-1K', 'HatefulMemes', 'ImageNet-A', 'ImageNet-R'],
+        "I-QA": ['OK-VQA', 'A-OKVQA', 'DocVQA', 'InfographicsVQA', 'ChartQA', 'Visual7W', 'ScienceQA', 'GQA', 'TextVQA', 'VizWiz'],
+        "I-RET": ['VisDial', 'CIRR', 'VisualNews_t2i', 'VisualNews_i2t', 'MSCOCO_t2i', 'MSCOCO_i2t', 'NIGHTS', 'WebQA', 'FashionIQ', 'Wiki-SS-NQ', 'OVEN', 'EDIS'],
+        "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W-Pointing']
+        },
+    "visdoc": {
+        "ViDoRe-V1": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry'],
+        "ViDoRe-V2": ["ViDoRe_esg_reports_human_labeled_v2","ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2_multilingual"], # "ViDoRe_biomedical_lectures_v2", "ViDoRe_economics_reports_v2", "ViDoRe_esg_reports_v2"
+        "VisRAG": ['VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA'],
+        "VisDoc-OOD": ['ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
+        },
+    "video": {
+        "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
+        "V-QA": ['Video-MME', 'MVBench', 'NExTQA', 'EgoSchema', 'ActivityNetQA'],
+        "V-RET": ['MSR-VTT', 'MSVD', 'DiDeMo', 'VATEX', 'YouCook2'],
+        "V-MRET": ['QVHighlight', 'Charades-STA', 'MomentSeeker']
+        }
+}
+ALL_DATASETS_SPLITS = {k: sum_lol(list(v.values())) for k, v in DATASETS.items()}
+ALL_DATASETS = sum_lol(list(ALL_DATASETS_SPLITS.values()))
+MODALITIES = list(DATASETS.keys())
+SPECIAL_METRICS = {
+    '__default__': 'hit@1',
+}

utils.py CHANGED Viewed

@@ -6,8 +6,11 @@ import os
 import requests
 import io
 import shutil
 from huggingface_hub import Repository
 HF_TOKEN = os.environ.get("HF_TOKEN")
 BASE_COLS = ["Rank", "Models", "Model Size(B)", "Data Source"]
@@ -33,7 +36,7 @@ Building upon on **MMEB-V1**, **MMEB-V2** expands the evaluation scope to includ
 This comprehensive suite enables robust evaluation of multimodal embedding models across static, temporal, and structured visual data settings.
 | [**📈Overview**](https://tiger-ai-lab.github.io/VLM2Vec/) | [**Github**](https://github.com/TIGER-AI-Lab/VLM2Vec)
-| [**📖MMEB-V2/VLM2Vec-V2 Paper (TBA)**](https://arxiv.org/abs/2410.05160)
 | [**📖MMEB-V1/VLM2Vec-V1 Paper**](https://arxiv.org/abs/2410.05160)
 | [**🤗Hugging Face**](https://huggingface.co/datasets/TIGER-Lab/MMEB-V2)
 | [**Discord**](https://discord.gg/njyKubdtry) |
@@ -42,8 +45,10 @@ This comprehensive suite enables robust evaluation of multimodal embedding model
 TABLE_INTRODUCTION = """***Important Notes: ***
 This is the MMEB-V1 leaderboard, which is now deprecated. MMEB-V1 is now the Image section of MMEB-V2, and the results on this leaderboard have been integrated into MMEB-V2 Image tab. For researchers relying on MMEB-V1, we recommend transitioning to MMEB-V2 for more comprehensive evaluation metrics and support. Thank you for your collaborations and understanding! \n"""
-LEADERBOARD_INFO = """
-## Dataset Summary
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"

 import requests
 import io
 import shutil
+import pprint as pp
 from huggingface_hub import Repository
+from datasets import DATASETS
 HF_TOKEN = os.environ.get("HF_TOKEN")
 BASE_COLS = ["Rank", "Models", "Model Size(B)", "Data Source"]
 This comprehensive suite enables robust evaluation of multimodal embedding models across static, temporal, and structured visual data settings.
 | [**📈Overview**](https://tiger-ai-lab.github.io/VLM2Vec/) | [**Github**](https://github.com/TIGER-AI-Lab/VLM2Vec)
+| [**📖MMEB-V2/VLM2Vec-V2 Paper**](https://arxiv.org/abs/2507.04590)
 | [**📖MMEB-V1/VLM2Vec-V1 Paper**](https://arxiv.org/abs/2410.05160)
 | [**🤗Hugging Face**](https://huggingface.co/datasets/TIGER-Lab/MMEB-V2)
 | [**Discord**](https://discord.gg/njyKubdtry) |
 TABLE_INTRODUCTION = """***Important Notes: ***
 This is the MMEB-V1 leaderboard, which is now deprecated. MMEB-V1 is now the Image section of MMEB-V2, and the results on this leaderboard have been integrated into MMEB-V2 Image tab. For researchers relying on MMEB-V1, we recommend transitioning to MMEB-V2 for more comprehensive evaluation metrics and support. Thank you for your collaborations and understanding! \n"""
+LEADERBOARD_INFO = f"""
+## Dataset Overview
+This is the dictionary of all datasets used in our code. Please make sure all datasets' scores are included in your submission. \n
+{pp.pformat(DATASETS)}
 """
 CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"

utils_v2.py CHANGED Viewed

@@ -2,42 +2,7 @@ import json
 import os
 import pandas as pd
 from utils import create_hyperlinked_names, process_model_size
-def sum_lol(lol):
-    assert isinstance(lol, list) and all(isinstance(i, list) for i in lol), f"Input should be a list of lists, got {type(lol)}"
-    total = []
-    for sublist in lol:
-        total.extend(sublist)
-    return total
-SCORE_BASE_DIR = "scores"
-META_DATA = ["model_name", "model_size", "url"]
-DATASETS = {
-    "image": {
-        "I-CLS": ['VOC2007', 'N24News', 'SUN397', 'ObjectNet', 'Country211', 'Place365', 'ImageNet-1K', 'HatefulMemes', 'ImageNet-A', 'ImageNet-R'],
-        "I-QA": ['OK-VQA', 'A-OKVQA', 'DocVQA', 'InfographicsVQA', 'ChartQA', 'Visual7W', 'ScienceQA', 'GQA', 'TextVQA', 'VizWiz'],
-        "I-RET": ['VisDial', 'CIRR', 'VisualNews_t2i', 'VisualNews_i2t', 'MSCOCO_t2i', 'MSCOCO_i2t', 'NIGHTS', 'WebQA', 'FashionIQ', 'Wiki-SS-NQ', 'OVEN', 'EDIS'],
-        "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W-Pointing']
-        },
-    "visdoc": {
-        "ViDoRe-V1": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry'],
-        "ViDoRe-V2": ["ViDoRe_esg_reports_human_labeled_v2","ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2_multilingual"], # "ViDoRe_biomedical_lectures_v2", "ViDoRe_economics_reports_v2", "ViDoRe_esg_reports_v2"
-        "VisRAG": ['VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA'],
-        "VisDoc-OOD": ['ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
-        },
-    "video": {
-        "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
-        "V-QA": ['Video-MME', 'MVBench', 'NExTQA', 'EgoSchema', 'ActivityNetQA'],
-        "V-RET": ['MSR-VTT', 'MSVD', 'DiDeMo', 'VATEX', 'YouCook2'],
-        "V-MRET": ['QVHighlight', 'Charades-STA', 'MomentSeeker']
-        }
-}
-ALL_DATASETS_SPLITS = {k: sum_lol(list(v.values())) for k, v in DATASETS.items()}
-ALL_DATASETS = sum_lol(list(ALL_DATASETS_SPLITS.values()))
-MODALITIES = list(DATASETS.keys())
-SPECIAL_METRICS = {
-    '__default__': 'hit@1',
-}
 BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
 BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
@@ -77,7 +42,15 @@ LEADERBOARD_INFO = """
 ## Dataset Summary
 """
-CITATION_BUTTON_TEXT = r"""TBA"""
 def load_single_json(file_path):
     with open(file_path, 'r') as file:

 import os
 import pandas as pd
 from utils import create_hyperlinked_names, process_model_size
+from datasets import *
 BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
 BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
 ## Dataset Summary
 """
+CITATION_BUTTON_TEXT = r"""@misc{meng2025vlm2vecv2advancingmultimodalembedding,
+      title={VLM2Vec-V2: Advancing Multimodal Embedding for Videos, Images, and Visual Documents},
+      author={Rui Meng and Ziyan Jiang and Ye Liu and Mingyi Su and Xinyi Yang and Yuepeng Fu and Can Qin and Zeyuan Chen and Ran Xu and Caiming Xiong and Yingbo Zhou and Wenhu Chen and Semih Yavuz},
+      year={2025},
+      eprint={2507.04590},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2507.04590},
+}"""
 def load_single_json(file_path):
     with open(file_path, 'r') as file: