MINGYISU commited on
Commit
64dcfc0
Β·
verified Β·
1 Parent(s): 08f8498
app.py CHANGED
@@ -23,7 +23,7 @@ with gr.Blocks() as block:
23
  gr.Markdown(LEADERBOARD_INTRODUCTION)
24
 
25
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
26
- # Table 1
27
  with gr.TabItem("πŸ“Š MMEB (V2)", elem_id="qa-tab-table1", id=1):
28
  with gr.Row():
29
  with gr.Accordion("Citation", open=False):
@@ -92,10 +92,11 @@ with gr.Blocks() as block:
92
  )
93
  refresh_button2.click(fn=v2.refresh_data, outputs=data_component2)
94
 
95
- # table 2
96
  with gr.TabItem("πŸ–ΌοΈ Image", elem_id="qa-tab-table1", id=2):
 
97
  data_component3 = gr.components.Dataframe(
98
- value=df2[v2.COLUMN_NAMES_I],
99
  headers=v2.COLUMN_NAMES_I,
100
  type="pandas",
101
  datatype=v2.DATA_TITLE_TYPE_I,
@@ -104,10 +105,11 @@ with gr.Blocks() as block:
104
  max_height=2400,
105
  )
106
 
107
- # table 3
108
  with gr.TabItem("πŸ’½ Video", elem_id="qa-tab-table1", id=3):
 
109
  data_component4 = gr.components.Dataframe(
110
- value=df2[v2.COLUMN_NAMES_V],
111
  headers=v2.COLUMN_NAMES_V,
112
  type="pandas",
113
  datatype=v2.DATA_TITLE_TYPE_V,
@@ -116,10 +118,11 @@ with gr.Blocks() as block:
116
  max_height=2400,
117
  )
118
 
119
- # table 4
120
  with gr.TabItem("πŸ“‘ Visual Doc", elem_id="qa-tab-table1", id=4):
 
121
  data_component5 = gr.components.Dataframe(
122
- value=df2[v2.COLUMN_NAMES_D],
123
  headers=v2.COLUMN_NAMES_D,
124
  type="pandas",
125
  datatype=v2.DATA_TITLE_TYPE_D,
 
23
  gr.Markdown(LEADERBOARD_INTRODUCTION)
24
 
25
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
26
+ # Table 1, the main leaderboard of overall scores
27
  with gr.TabItem("πŸ“Š MMEB (V2)", elem_id="qa-tab-table1", id=1):
28
  with gr.Row():
29
  with gr.Accordion("Citation", open=False):
 
92
  )
93
  refresh_button2.click(fn=v2.refresh_data, outputs=data_component2)
94
 
95
+ # table 2, image scores only
96
  with gr.TabItem("πŸ–ΌοΈ Image", elem_id="qa-tab-table1", id=2):
97
+ gr.Markdown(v2.TABLE_INTRODUCTION_I)
98
  data_component3 = gr.components.Dataframe(
99
+ value=v2.rank_models(df2[v2.COLUMN_NAMES_I], 'Image-Overall'),
100
  headers=v2.COLUMN_NAMES_I,
101
  type="pandas",
102
  datatype=v2.DATA_TITLE_TYPE_I,
 
105
  max_height=2400,
106
  )
107
 
108
+ # table 3, video scores only
109
  with gr.TabItem("πŸ’½ Video", elem_id="qa-tab-table1", id=3):
110
+ gr.Markdown(v2.TABLE_INTRODUCTION_V)
111
  data_component4 = gr.components.Dataframe(
112
+ value=v2.rank_models(df2[v2.COLUMN_NAMES_V], 'Video-Overall'),
113
  headers=v2.COLUMN_NAMES_V,
114
  type="pandas",
115
  datatype=v2.DATA_TITLE_TYPE_V,
 
118
  max_height=2400,
119
  )
120
 
121
+ # table 4, visual document scores only
122
  with gr.TabItem("πŸ“‘ Visual Doc", elem_id="qa-tab-table1", id=4):
123
+ gr.Markdown(v2.TABLE_INTRODUCTION_D)
124
  data_component5 = gr.components.Dataframe(
125
+ value=v2.rank_models(df2[v2.COLUMN_NAMES_D], 'VisDoc'),
126
  headers=v2.COLUMN_NAMES_D,
127
  type="pandas",
128
  datatype=v2.DATA_TITLE_TYPE_D,
scores/LamRA-Ret-Qwen2.5VL-7b.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "LamRA-Ret-Qwen2.5VL-7b",
4
- "report_generated_date": "2025-06-09T07:00:24.383583"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "LamRA-Ret-Qwen2.5VL-7b",
4
+ "report_generated_date": "2025-06-09T07:00:24.383583",
5
+ "model_size": 8.29,
6
+ "url": "https://huggingface.co/code-kunkun/LamRA-Ret-Qwen2.5VL-7b",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
scores/LamRA-Ret.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "LamRA-Ret",
4
- "report_generated_date": "2025-06-09T07:03:51.413144"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "LamRA-Ret",
4
+ "report_generated_date": "2025-06-09T07:03:51.413144",
5
+ "model_size": 8.29,
6
+ "url": "https://huggingface.co/code-kunkun/LamRA-Ret",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
scores/VLM2Vec-V1-Qwen2VL-2B.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "VLM2Vec-V1-Qwen2VL-2B",
4
- "report_generated_date": "2025-06-09T07:08:50.537181"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "VLM2Vec-V1-Qwen2VL-2B",
4
+ "report_generated_date": "2025-06-09T07:08:50.537181",
5
+ "model_size": 2.21,
6
+ "url": "https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-2B",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
scores/VLM2Vec-V1-Qwen2VL-7B.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "VLM2Vec-V1-Qwen2VL-7B",
4
- "report_generated_date": "2025-06-08T08:08:07.905654"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "VLM2Vec-V1-Qwen2VL-7B",
4
+ "report_generated_date": "2025-06-08T08:08:07.905654",
5
+ "model_size": 8.29,
6
+ "url": "https://huggingface.co/TIGER-Lab/VLM2Vec-Qwen2VL-7B",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
scores/VLM2Vec-V2.0-Qwen2VL-2B.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "VLM2Vec-V2.0-Qwen2VL-2B",
4
- "report_generated_date": "2025-06-09T07:05:59.773788"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "VLM2Vec-V2.0-Qwen2VL-2B",
4
+ "report_generated_date": "2025-06-09T07:05:59.773788",
5
+ "model_size": 2.21,
6
+ "url": "https://huggingface.co/VLM2Vec/VLM2Vec-V2.0",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
scores/colpali-v1.3.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "colpali-v1.3",
4
- "report_generated_date": "2025-06-09T07:08:13.841120"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "colpali-v1.3",
4
+ "report_generated_date": "2025-06-09T07:08:13.841120",
5
+ "model_size": 2.92,
6
+ "url": "https://huggingface.co/vidore/colpali-v1.3",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
scores/gme-Qwen2-VL-2B-Instruct.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "gme-Qwen2-VL-2B-Instruct",
4
- "report_generated_date": "2025-06-09T07:04:30.518891"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "gme-Qwen2-VL-2B-Instruct",
4
+ "report_generated_date": "2025-06-09T07:04:30.518891",
5
+ "model_size": 2.21,
6
+ "url": "https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-2B-Instruct",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
scores/gme-Qwen2-VL-7B-Instruct.json CHANGED
@@ -1,7 +1,10 @@
1
  {
2
  "metadata": {
3
  "model_name": "gme-Qwen2-VL-7B-Instruct",
4
- "report_generated_date": "2025-06-09T07:05:25.508931"
 
 
 
5
  },
6
  "metrics": {
7
  "image": {
 
1
  {
2
  "metadata": {
3
  "model_name": "gme-Qwen2-VL-7B-Instruct",
4
+ "report_generated_date": "2025-06-09T07:05:25.508931",
5
+ "model_size": 8.29,
6
+ "url": "https://huggingface.co/Alibaba-NLP/gme-Qwen2-VL-7B-Instruct",
7
+ "data_source": "TIGER-Lab"
8
  },
9
  "metrics": {
10
  "image": {
utils.py CHANGED
@@ -57,26 +57,9 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
57
 
58
  ## ⚠ Please note that you need to submit the JSON file with the following format:
59
 
60
- ### **TO SUBMIT V1 ONLY (Depreciating)**
61
- ```json
62
- [
63
- {
64
- "Model": "<Model Name>",
65
- "URL": "<Model URL>" or null,
66
- "Model Size(B)": 1000 or null,
67
- "Data Source": "Self-Reported",
68
- "V1-Overall": 50.0,
69
- "I-CLS": 50.0,
70
- "I-QA": 50.0,
71
- "I-RET": 50.0,
72
- "I-VG": 50.0
73
- },
74
- ]
75
- ```
76
-
77
- ### ***Important Notes: We will be releasing MMEB-V2 soon!***
78
- ### ***In V2, the detailed scores of each dataset will be included, and our code will automatically generate the results and calculate the overall scores.***
79
- ### **A V2 Submission would look like this: (TO BE RELEASED SOON)**
80
  ```json
81
  {
82
  "metadata": {
@@ -84,8 +67,6 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
84
  "URL": "<Model URL>" or null,
85
  "Model Size(B)": 1000 or null,
86
  "Data Source": "Self-Reported",
87
- "V1-Overall": 50.0,
88
- "V2-Overall": 50.0
89
  },
90
  "metrics": {
91
  "image": {
@@ -121,7 +102,24 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
121
  }
122
  }
123
  ```
124
- You may refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for instructions about evaluating your model. \n
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  Please send us an email at [email protected], attaching the JSON file. We will review your submission and update the leaderboard accordingly. \n
126
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
127
  """
 
57
 
58
  ## ⚠ Please note that you need to submit the JSON file with the following format:
59
 
60
+ ### ***Important Notes: We have released MMEB-V2 and will deprecate MMEB-V1 soon. All further submissions should be made using the V2 format (see following).***
61
+ ### ***In V2, the detailed scores of each dataset will be included, and our code will automatically generate the results and calculate the overall scores. See the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for more information.***
62
+ ### **A V2 Submission would look like this:**
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ```json
64
  {
65
  "metadata": {
 
67
  "URL": "<Model URL>" or null,
68
  "Model Size(B)": 1000 or null,
69
  "Data Source": "Self-Reported",
 
 
70
  },
71
  "metrics": {
72
  "image": {
 
102
  }
103
  }
104
  ```
105
+
106
+ ### **TO SUBMIT V1 ONLY (Depreciated, but we still accept this format until 2025-06-30)**
107
+ ```json
108
+ [
109
+ {
110
+ "Model": "<Model Name>",
111
+ "URL": "<Model URL>" or null,
112
+ "Model Size(B)": 1000 or null,
113
+ "Data Source": "Self-Reported",
114
+ "V1-Overall": 50.0,
115
+ "I-CLS": 50.0,
116
+ "I-QA": 50.0,
117
+ "I-RET": 50.0,
118
+ "I-VG": 50.0
119
+ },
120
+ ]
121
+ ```
122
+ You may refer to the [**GitHub page**](https://github.com/TIGER-AI-Lab/VLM2Vec) for detailed instructions about evaluating your model. \n
123
  Please send us an email at [email protected], attaching the JSON file. We will review your submission and update the leaderboard accordingly. \n
124
  Please also share any feedback or suggestions you have for improving the leaderboard experience. We appreciate your contributions to the MMEB community!
125
  """
utils_v2.py CHANGED
@@ -1,14 +1,13 @@
1
  import json
2
  import os
3
  import pandas as pd
4
- from utils import create_hyperlinked_names
5
-
6
- def sum_lst(lst):
7
- assert isinstance(lst, list) and lst, f"Input should be a non-empty list, got {type(lst)}, size {len(lst)}"
8
- total = lst[0]
9
- for item in lst[1:]:
10
- assert isinstance(item, (list, int, float)), f"Expected types are list and numbers, got {type(item)}"
11
- total += item
12
  return total
13
 
14
  SCORE_BASE_DIR = "scores"
@@ -21,7 +20,7 @@ DATASETS = {
21
  "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
22
  },
23
  "visdoc": {
24
- "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc']
25
  },
26
  "video": {
27
  "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
@@ -30,8 +29,8 @@ DATASETS = {
30
  "V-MRET": ['QVHighlight', 'Charades-STA', 'MomentSeeker', 'ActivityNetQA']
31
  }
32
  }
33
- ALL_DATASETS_SPLITS = {k: sum_lst(list(v.values())) for k, v in DATASETS.items()}
34
- ALL_DATASETS = sum_lst(list(ALL_DATASETS_SPLITS.values()))
35
  MODALITIES = list(DATASETS.keys())
36
  SPECIAL_METRICS = {
37
  '__default__': 'hit@1',
@@ -45,24 +44,29 @@ COLUMN_NAMES = BASE_COLS + ["Overall", 'Image-Overall', 'Video-Overall', 'VisDoc
45
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
46
  ['number'] * 3
47
 
48
- TASKS_I = ['Image-Overall'] + ALL_DATASETS_SPLITS['image']
49
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
50
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
51
- ['number'] * len(TASKS_I)
52
 
53
- TASKS_V = ['Video-Overall'] + ALL_DATASETS_SPLITS['video']
54
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
55
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
56
- ['number'] * len(TASKS_V)
57
 
58
  TASKS_D = ['VisDoc'] + ALL_DATASETS_SPLITS['visdoc']
59
  COLUMN_NAMES_D = BASE_COLS + TASKS_D
60
  DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
61
  ['number'] * len(TASKS_D)
62
 
63
- TABLE_INTRODUCTION = """**I-CLS**: Image Classification, **I-QA**: (Image) Visual Question Answering, **I-RET**: Image Retrieval, **I-VG**: (Image) Visual Grounding \n
64
- **V-CLS**: Video Classification, **V-QA**: (Video) Visual Question Answering, **V-RET**: Video Retrieval, **V-MRET**: Video Moment Retrieval \n
65
- **VisDoc**: Visual Document Understanding \n"""
 
 
 
 
 
66
 
67
  LEADERBOARD_INFO = """
68
  ## Dataset Summary
@@ -112,16 +116,16 @@ def calculate_score(raw_scores=None):
112
  avg_scores = {}
113
 
114
  # Calculate overall score for all datasets
115
- avg_scores['Overall'] = None # get_avg(sum(all_scores.values()), len(ALL_DATASETS))
116
 
117
  # Calculate scores for each modality
118
  for modality in MODALITIES:
119
- datasets_for_each_modality = ALL_DATASETS_SPLITS.get(modality, [])
120
  avg_scores[f"{modality.capitalize()}-Overall"] = get_avg(
121
  sum(all_scores.get(dataset, 0.0) for dataset in datasets_for_each_modality),
122
  len(datasets_for_each_modality)
123
  )
124
-
125
  # Calculate scores for each sub-task
126
  for modality, datasets_list in DATASETS.items():
127
  for sub_task, datasets in datasets_list.items():
@@ -136,20 +140,27 @@ def generate_model_row(data):
136
  row = {
137
  'Models': metadata.get('model_name', None),
138
  'Model Size(B)': metadata.get('model_size', None),
139
- 'URL': metadata.get('url', None)
 
140
  }
141
  scores = calculate_score(data['metrics'])
142
  row.update(scores)
143
  return row
144
 
 
 
 
 
 
 
145
  def get_df():
146
  """Generates a DataFrame from the loaded data."""
147
  all_data = load_data()
148
  rows = [generate_model_row(data) for data in all_data]
149
  df = pd.DataFrame(rows)
150
- df = df.sort_values(by='Overall', ascending=False).reset_index(drop=True)
151
- df['Rank'] = range(1, len(df) + 1)
152
  df = create_hyperlinked_names(df)
 
153
  return df
154
 
155
  def refresh_data():
 
1
  import json
2
  import os
3
  import pandas as pd
4
+ from utils import create_hyperlinked_names, process_model_size
5
+
6
+ def sum_lol(lol):
7
+ assert isinstance(lol, list) and all(isinstance(i, list) for i in lol), f"Input should be a list of lists, got {type(lol)}"
8
+ total = []
9
+ for sublist in lol:
10
+ total.extend(sublist)
 
11
  return total
12
 
13
  SCORE_BASE_DIR = "scores"
 
20
  "I-VG": ['MSCOCO', 'RefCOCO', 'RefCOCO-Matching', 'Visual7W']
21
  },
22
  "visdoc": {
23
+ "VisDoc": ['ViDoRe_arxivqa', 'ViDoRe_docvqa', 'ViDoRe_infovqa', 'ViDoRe_tabfquad', 'ViDoRe_tatdqa', 'ViDoRe_shiftproject', 'ViDoRe_syntheticDocQA_artificial_intelligence', 'ViDoRe_syntheticDocQA_energy', 'ViDoRe_syntheticDocQA_government_reports', 'ViDoRe_syntheticDocQA_healthcare_industry', 'VisRAG_ArxivQA', 'VisRAG_ChartQA', 'VisRAG_MP-DocVQA', 'VisRAG_SlideVQA', 'VisRAG_InfoVQA', 'VisRAG_PlotQA', 'ViDoSeek-page', 'ViDoSeek-doc', 'MMLongBench-page', 'MMLongBench-doc', "ViDoRe_esg_reports_human_labeled_v2", "ViDoRe_biomedical_lectures_v2", "ViDoRe_biomedical_lectures_v2_multilingual", "ViDoRe_economics_reports_v2", "ViDoRe_economics_reports_v2_multilingual", "ViDoRe_esg_reports_v2", "ViDoRe_esg_reports_v2_multilingual"]
24
  },
25
  "video": {
26
  "V-CLS": ['K700', 'UCF101', 'HMDB51', 'SmthSmthV2', 'Breakfast'],
 
29
  "V-MRET": ['QVHighlight', 'Charades-STA', 'MomentSeeker', 'ActivityNetQA']
30
  }
31
  }
32
+ ALL_DATASETS_SPLITS = {k: sum_lol(list(v.values())) for k, v in DATASETS.items()}
33
+ ALL_DATASETS = sum_lol(list(ALL_DATASETS_SPLITS.values()))
34
  MODALITIES = list(DATASETS.keys())
35
  SPECIAL_METRICS = {
36
  '__default__': 'hit@1',
 
44
  DATA_TITLE_TYPE = BASE_DATA_TITLE_TYPE + \
45
  ['number'] * 3
46
 
47
+ TASKS_I = ['Image-Overall'] + TASKS[1:5] + ALL_DATASETS_SPLITS['image']
48
  COLUMN_NAMES_I = BASE_COLS + TASKS_I
49
  DATA_TITLE_TYPE_I = BASE_DATA_TITLE_TYPE + \
50
+ ['number'] * (len(TASKS_I) + 4)
51
 
52
+ TASKS_V = ['Video-Overall'] + TASKS[6:10] + ALL_DATASETS_SPLITS['video']
53
  COLUMN_NAMES_V = BASE_COLS + TASKS_V
54
  DATA_TITLE_TYPE_V = BASE_DATA_TITLE_TYPE + \
55
+ ['number'] * (len(TASKS_V) + 4)
56
 
57
  TASKS_D = ['VisDoc'] + ALL_DATASETS_SPLITS['visdoc']
58
  COLUMN_NAMES_D = BASE_COLS + TASKS_D
59
  DATA_TITLE_TYPE_D = BASE_DATA_TITLE_TYPE + \
60
  ['number'] * len(TASKS_D)
61
 
62
+ TABLE_INTRODUCTION = """**MMEB**: Massive MultiModal Embedding Benchmark \n
63
+ Models are ranked based on **Overall**"""
64
+ TABLE_INTRODUCTION_I = """**I-CLS**: Image Classification, **I-QA**: (Image) Visual Question Answering, **I-RET**: Image Retrieval, **I-VG**: (Image) Visual Grounding \n
65
+ Models are ranked based on **Image-Overall**"""
66
+ TABLE_INTRODUCTION_V = """**V-CLS**: Video Classification, **V-QA**: (Video) Visual Question Answering, **V-RET**: Video Retrieval, **V-MRET**: Video Moment Retrieval \n
67
+ Models are ranked based on **Video-Overall**"""
68
+ TABLE_INTRODUCTION_D = """**VisDoc**: Visual Document Understanding \n
69
+ Models are ranked based on **VisDoc**"""
70
 
71
  LEADERBOARD_INFO = """
72
  ## Dataset Summary
 
116
  avg_scores = {}
117
 
118
  # Calculate overall score for all datasets
119
+ avg_scores['Overall'] = get_avg(sum(all_scores.values()), len(ALL_DATASETS))
120
 
121
  # Calculate scores for each modality
122
  for modality in MODALITIES:
123
+ datasets_for_each_modality = ALL_DATASETS_SPLITS[modality]
124
  avg_scores[f"{modality.capitalize()}-Overall"] = get_avg(
125
  sum(all_scores.get(dataset, 0.0) for dataset in datasets_for_each_modality),
126
  len(datasets_for_each_modality)
127
  )
128
+
129
  # Calculate scores for each sub-task
130
  for modality, datasets_list in DATASETS.items():
131
  for sub_task, datasets in datasets_list.items():
 
140
  row = {
141
  'Models': metadata.get('model_name', None),
142
  'Model Size(B)': metadata.get('model_size', None),
143
+ 'URL': metadata.get('url', None),
144
+ 'Data Source': metadata.get('data_source', 'Self-Reported'),
145
  }
146
  scores = calculate_score(data['metrics'])
147
  row.update(scores)
148
  return row
149
 
150
+ def rank_models(df, column='Overall'):
151
+ """Ranks the models based on the specific score."""
152
+ df = df.sort_values(by=column, ascending=False).reset_index(drop=True)
153
+ df['Rank'] = range(1, len(df) + 1)
154
+ return df
155
+
156
  def get_df():
157
  """Generates a DataFrame from the loaded data."""
158
  all_data = load_data()
159
  rows = [generate_model_row(data) for data in all_data]
160
  df = pd.DataFrame(rows)
161
+ df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
 
162
  df = create_hyperlinked_names(df)
163
+ df = rank_models(df, column='Overall')
164
  return df
165
 
166
  def refresh_data():