Spaces:

TIGER-Lab
/

MMEB-Leaderboard

Running

App Files Files Community

MINGYISU commited on Jun 15

Commit

037b103

2 Parent(s): 9cfc538 8e6921f

add vd sub task scores

Browse files

Files changed (3) hide show

app.py +1 -1
utils.py +0 -1
utils_v2.py +30 -9

app.py CHANGED Viewed

@@ -134,7 +134,7 @@ with gr.Blocks() as block:
         with gr.TabItem("📑 Visual Doc", elem_id="qa-tab-table1", id=4):
             gr.Markdown(v2.TABLE_INTRODUCTION_D)
             data_component5 = gr.components.Dataframe(
-                value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'VisDoc'),
                 headers=v2.COLUMN_NAMES_D,
                 type="pandas",
                 datatype=v2.DATA_TITLE_TYPE_D,

         with gr.TabItem("📑 Visual Doc", elem_id="qa-tab-table1", id=4):
             gr.Markdown(v2.TABLE_INTRODUCTION_D)
             data_component5 = gr.components.Dataframe(
+                value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'Visdoc-Overall'),
                 headers=v2.COLUMN_NAMES_D,
                 type="pandas",
                 datatype=v2.DATA_TITLE_TYPE_D,

utils.py CHANGED Viewed

@@ -103,7 +103,6 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
     }
 }
 ```
-Note: We still accept the old format until 2025-06-30.
 Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
 To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
 Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!

     }
 }
 ```
 Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
 To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
 Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!

utils_v2.py CHANGED Viewed

@@ -20,7 +20,10 @@ DATASETS = {
         "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
         },
     "visdoc": {
-        "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc', "ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2", "ViDoRe_esg_reports_v2_multilingual"]
         },
     "video": {
         "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
@@ -37,29 +40,29 @@ SPECIAL_METRICS = {
 }
 BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
-TASKS = ["Overall", "I-CLS", "I-QA", "I-RET", "I-VG", "VisDoc", "V-CLS", "V-QA", "V-RET", "V-MRET"]
 BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
-COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'VisDoc']
 DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
                     ['number'] * 3
-SUB_TASKS_I = TASKS[1:5]
 TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
 COLUMN_NAMES_I = BASE_COLS + TASKS_I
 DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
-                    ['number'] * (len(TASKS_I) + 4)
-SUB_TASKS_V = TASKS[6:10]
 TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
 COLUMN_NAMES_V = BASE_COLS + TASKS_V
 DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
-                    ['number'] * (len(TASKS_V) + 4)
-TASKS_D = ['VisDoc'] + ALL_DATASETS_SPLITS['visdoc']
 COLUMN_NAMES_D = BASE_COLS + TASKS_D
 DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
-                    ['number'] * len(TASKS_D)
 TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
                         Models are ranked based on **Overall**"""
@@ -155,6 +158,24 @@ def rank_models(df, column='Overall', rank_name='Rank'):
     df[rank_name] = range(1, len(df) + 1)
     return df
 def get_df():
     """Generates a DataFrame from the loaded data."""
     all_data = load_data()

         "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
         },
     "visdoc": {
+        "ViDoRe-V1": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry'],
+        "ViDoRe-V2": ["ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_economics_reports_v2", "ViDoRe_esg_reports_v2"],  # Following Abandoned: "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2_multilingual"
+        "VisRAG": ['VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA'],
+        "VisDoc-OOD": ['ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
         },
     "video": {
         "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
 }
 BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
 BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
+COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'Visdoc-Overall']
 DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
                     ['number'] * 3
+SUB_TASKS_I = ["I-CLS", "I-QA", "I-RET", "I-VG"]
 TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
 COLUMN_NAMES_I = BASE_COLS + TASKS_I
 DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
+                    ['number'] * len(TASKS_I + SUB_TASKS_I)
+SUB_TASKS_V = ["V-CLS", "V-QA", "V-RET", "V-MRET"]
 TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
 COLUMN_NAMES_V = BASE_COLS + TASKS_V
 DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
+                    ['number'] * len(TASKS_V + SUB_TASKS_V)
+SUB_TASKS_D = ['ViDoRe-V1', 'ViDoRe-V2', 'VisRAG', 'VisDoc-OOD']
+TASKS_D = ['Visdoc-Overall'] + SUB_TASKS_D + ALL_DATASETS_SPLITS['visdoc']
 COLUMN_NAMES_D = BASE_COLS + TASKS_D
 DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
+                    ['number'] * len(TASKS_D + SUB_TASKS_D)
 TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
                         Models are ranked based on **Overall**"""
     df[rank_name] = range(1, len(df) + 1)
     return df
+def add_color_to_column(df)
+    def render_color(text, color):
+        """Renders the text in a specific color for Markdown."""
+        return f"<span style='color:{color};'>{text}</span>"
+    df = df.copy()
+    SUB_TASKS = SUB_TASKS_I + SUB_TASKS_V + SUB_TASKS_D
+    MOD_OVERALL = ['Image-Overall', 'Video-Overall', 'Visdoc-Overall']
+    assert all(col in df.columns for col in ["Overall"] + MOD_OVERALL + SUB_TASKS), f"Missing columns in DataFrame: {SUB_TASKS}"
+    renamed_columns = {'Overall': render_color('Overall', 'red')}
+    for col in MOD_OVERALL:
+        renamed_columns[col] = render_color(col, 'yellow')
+    for col in SUB_TASKS:
+        renamed_columns[col] = render_color(col, 'blue')
+    df.rename(columns=renamed_columns)
+    return df
 def get_df():
     """Generates a DataFrame from the loaded data."""
     all_data = load_data()