Spaces:
Running
Running
add visdoc sub task scores (#52)
Browse files- integrate v1 scores into v2 (a84b2867d9a4a0e33270363b0714046fd615e200)
- fixed issue (05eb353153751b5e47a54ab85a60184faf506c9a)
- fixed (8cbf5e6fbd748f32853ce8bf32a6ce431b94d3c6)
- updated visdoc sub tasks (8e6921ff462e6933b5cd0407b677881e761ff0d9)
- add vd sub task scores (037b103fe2c30de21eefd960e38901f1bc47d588)
- fix issues (a2d53876e17e381396fe0a5303e84841027ab539)
- app.py +1 -1
- utils.py +0 -1
- utils_v2.py +12 -9
app.py
CHANGED
@@ -134,7 +134,7 @@ with gr.Blocks() as block:
|
|
134 |
with gr.TabItem("π Visual Doc", elem_id="qa-tab-table1", id=4):
|
135 |
gr.Markdown(v2.TABLE_INTRODUCTION_D)
|
136 |
data_component5 = gr.components.Dataframe(
|
137 |
-
value=v2.rank_models(df2[v2.COLUMN_NAMES_D], '
|
138 |
headers=v2.COLUMN_NAMES_D,
|
139 |
type="pandas",
|
140 |
datatype=v2.DATA_TITLE_TYPE_D,
|
|
|
134 |
with gr.TabItem("π Visual Doc", elem_id="qa-tab-table1", id=4):
|
135 |
gr.Markdown(v2.TABLE_INTRODUCTION_D)
|
136 |
data_component5 = gr.components.Dataframe(
|
137 |
+
value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'Visdoc-Overall'),
|
138 |
headers=v2.COLUMN_NAMES_D,
|
139 |
type="pandas",
|
140 |
datatype=v2.DATA_TITLE_TYPE_D,
|
utils.py
CHANGED
@@ -103,7 +103,6 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
|
|
103 |
}
|
104 |
}
|
105 |
```
|
106 |
-
Note: We still accept the old format until 2025-06-30.
|
107 |
Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
|
108 |
To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
|
109 |
Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
|
|
|
103 |
}
|
104 |
}
|
105 |
```
|
|
|
106 |
Please refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
|
107 |
To submit, create a pull request and upload the generated JSON file to the ***scores*** folder, then send us an email at [email protected], including your model's information. \n We will review your submission and update the leaderboard accordingly. \n
|
108 |
Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
|
utils_v2.py
CHANGED
@@ -20,7 +20,10 @@ DATASETS = {
|
|
20 |
"I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
|
21 |
},
|
22 |
"visdoc": {
|
23 |
-
"
|
|
|
|
|
|
|
24 |
},
|
25 |
"video": {
|
26 |
"V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
|
@@ -37,29 +40,29 @@ SPECIAL_METRICS = {
|
|
37 |
}
|
38 |
|
39 |
BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
|
40 |
-
TASKS = ["Overall", "I-CLS", "I-QA", "I-RET", "I-VG", "VisDoc", "V-CLS", "V-QA", "V-RET", "V-MRET"]
|
41 |
BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
|
42 |
|
43 |
-
COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', '
|
44 |
DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
|
45 |
['number'] * 3
|
46 |
|
47 |
-
SUB_TASKS_I =
|
48 |
TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
|
49 |
COLUMN_NAMES_I = BASE_COLS + TASKS_I
|
50 |
DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
|
51 |
-
['number'] *
|
52 |
|
53 |
-
SUB_TASKS_V =
|
54 |
TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
|
55 |
COLUMN_NAMES_V = BASE_COLS + TASKS_V
|
56 |
DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
|
57 |
-
['number'] *
|
58 |
|
59 |
-
|
|
|
60 |
COLUMN_NAMES_D = BASE_COLS + TASKS_D
|
61 |
DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
|
62 |
-
['number'] * len(TASKS_D)
|
63 |
|
64 |
TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
|
65 |
Models are ranked based on **Overall**"""
|
|
|
20 |
"I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
|
21 |
},
|
22 |
"visdoc": {
|
23 |
+
"ViDoRe-V1": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry'],
|
24 |
+
"ViDoRe-V2": ["ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_economics_reports_v2", "ViDoRe_esg_reports_v2"], # Following Abandoned: "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2_multilingual"
|
25 |
+
"VisRAG": ['VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA'],
|
26 |
+
"VisDoc-OOD": ['ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
|
27 |
},
|
28 |
"video": {
|
29 |
"V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
|
|
|
40 |
}
|
41 |
|
42 |
BASE_COLS = ['Rank', 'Models', 'Model Size(B)']
|
|
|
43 |
BASE_DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown']
|
44 |
|
45 |
+
COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'Visdoc-Overall']
|
46 |
DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
|
47 |
['number'] * 3
|
48 |
|
49 |
+
SUB_TASKS_I = ["I-CLS", "I-QA", "I-RET", "I-VG"]
|
50 |
TASKS_I = ['Image-Overall'] + SUB_TASKS_I + ALL_DATASETS_SPLITS['image']
|
51 |
COLUMN_NAMES_I = BASE_COLS + TASKS_I
|
52 |
DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
|
53 |
+
['number'] * len(TASKS_I + SUB_TASKS_I)
|
54 |
|
55 |
+
SUB_TASKS_V = ["V-CLS", "V-QA", "V-RET", "V-MRET"]
|
56 |
TASKS_V = ['Video-Overall'] + SUB_TASKS_V + ALL_DATASETS_SPLITS['video']
|
57 |
COLUMN_NAMES_V = BASE_COLS + TASKS_V
|
58 |
DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
|
59 |
+
['number'] * len(TASKS_V + SUB_TASKS_V)
|
60 |
|
61 |
+
SUB_TASKS_D = ['ViDoRe-V1', 'ViDoRe-V2', 'VisRAG', 'VisDoc-OOD']
|
62 |
+
TASKS_D = ['Visdoc-Overall'] + SUB_TASKS_D + ALL_DATASETS_SPLITS['visdoc']
|
63 |
COLUMN_NAMES_D = BASE_COLS + TASKS_D
|
64 |
DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
|
65 |
+
['number'] * len(TASKS_D + SUB_TASKS_D)
|
66 |
|
67 |
TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
|
68 |
Models are ranked based on **Overall**"""
|