cccjc commited on
Commit
2b8200c
·
1 Parent(s): 0396eb4
constants.py CHANGED
@@ -261,6 +261,9 @@ MODEL_URLS = {
261
  "InternVL3_14B": "https://huggingface.co/OpenGVLab/InternVL3-14B",
262
  "InternVL3_38B": "https://huggingface.co/OpenGVLab/InternVL3-38B",
263
  "InternVL3_78B": "https://huggingface.co/OpenGVLab/InternVL3-78B",
 
 
 
264
  }
265
 
266
  # Define the base MODEL_GROUPS structure
 
261
  "InternVL3_14B": "https://huggingface.co/OpenGVLab/InternVL3-14B",
262
  "InternVL3_38B": "https://huggingface.co/OpenGVLab/InternVL3-38B",
263
  "InternVL3_78B": "https://huggingface.co/OpenGVLab/InternVL3-78B",
264
+ "GPT-o1": "https://openai.com/o1/",
265
+ "GPT-o1-mini": "https://openai.com/index/openai-o1-mini-advancing-cost-efficient-reasoning/",
266
+ "Seed1.5-VL": "https://github.com/ByteDance-Seed/Seed1.5-VL",
267
  }
268
 
269
  # Define the base MODEL_GROUPS structure
static/eval_results/Default/self_reported.json CHANGED
@@ -1,8 +1,25 @@
1
  {
2
- "MiniMax-VL-01": 47.4,
3
- "Qwen2.5-VL-72B": 51.3,
4
- "Qwen2.5-VL-7B": 36.8,
5
- "Qwen2.5-VL-3B": 28.9,
6
- "GPT-o1": 58.0,
7
- "GPT-o1-mini": 54.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  }
 
1
  {
2
+ "MiniMax-VL-01": {
3
+ "overall": 47.4
4
+ },
5
+ "Qwen2.5-VL-72B": {
6
+ "overall": 51.3
7
+ },
8
+ "Qwen2.5-VL-7B": {
9
+ "overall": 36.8
10
+ },
11
+ "Qwen2.5-VL-3B": {
12
+ "overall": 28.9
13
+ },
14
+ "GPT-o1": {
15
+ "overall": 58.0
16
+ },
17
+ "GPT-o1-mini": {
18
+ "overall": 54.2
19
+ },
20
+ "Seed1.5-VL": {
21
+ "overall": 59.85,
22
+ "core": 58.58,
23
+ "open": 68.46
24
+ }
25
  }
utils.py CHANGED
@@ -106,11 +106,20 @@ class MEGABenchEvalDataLoader:
106
 
107
  # Add asterisk for self-reported results
108
  if model in self.SELF_REPORTED:
 
109
  # Store numeric value for sorting but display with asterisk
110
- row["Overall"] = self.SELF_REPORTED[model]
111
- row["Overall_display"] = f"{self.SELF_REPORTED[model]:.2f}*"
112
- row["Core"] = None
113
- row["Open-ended"] = None
 
 
 
 
 
 
 
 
114
  for display_name in self.SUPER_GROUPS[selected_super_group]:
115
  row[display_name] = None
116
  else:
 
106
 
107
  # Add asterisk for self-reported results
108
  if model in self.SELF_REPORTED:
109
+ model_scores = self.SELF_REPORTED[model]
110
  # Store numeric value for sorting but display with asterisk
111
+ row["Overall"] = model_scores["overall"]
112
+ row["Overall_display"] = f"{model_scores['overall']:.2f}*"
113
+ # Handle optional core and open scores
114
+ core_score = model_scores.get("core")
115
+ open_score = model_scores.get("open")
116
+ row["Core"] = core_score
117
+ row["Open-ended"] = open_score
118
+ # Add asterisk to core and open scores if they exist
119
+ if core_score is not None:
120
+ row["Core"] = f"{core_score:.2f}*"
121
+ if open_score is not None:
122
+ row["Open-ended"] = f"{open_score:.2f}*"
123
  for display_name in self.SUPER_GROUPS[selected_super_group]:
124
  row[display_name] = None
125
  else: