cccjc commited on
Commit
2c937b6
Β·
1 Parent(s): e5b03d3
Files changed (1) hide show
  1. constants.py +3 -1
constants.py CHANGED
@@ -32,7 +32,7 @@ We aim to provide cost-effective and accurate evaluation for multimodal models,
32
  ## πŸ“ŠπŸ” Results & Takeaways from Evaluating Top Models
33
 
34
 
35
- ### ️‍πŸ”₯πŸ“ 2025.01
36
 
37
  - **Gemini 2.0 Experimental (1206)** and **Gemini 2.0 Flash Experimental** outperform **GPT-4o** and **Claude 3.5 Sonnet**.
38
  - We add **Grok-2-vision-1212** to the single-image leaderboard. The model seems to use a lot of tokens per image, and cannot run many of our multi-image and video tasks.
@@ -279,6 +279,7 @@ BASE_MODEL_GROUPS = {
279
  "Qwen2.5-VL-72B",
280
  "Gemma_3_27B_IT",
281
  "Gemini_2.5_pro_0325",
 
282
  "InternVL3_78B",
283
  ],
284
  "Efficiency Models": [
@@ -338,6 +339,7 @@ BASE_MODEL_GROUPS = {
338
  "MiniMax-VL-01",
339
  "Qwen2.5-VL-72B",
340
  "Gemma_3_27B_IT",
 
341
  "InternVL3_78B",
342
  ],
343
  "Open-source Efficiency Models": [
 
32
  ## πŸ“ŠπŸ” Results & Takeaways from Evaluating Top Models
33
 
34
 
35
+ ### πŸ“ 2025.01
36
 
37
  - **Gemini 2.0 Experimental (1206)** and **Gemini 2.0 Flash Experimental** outperform **GPT-4o** and **Claude 3.5 Sonnet**.
38
  - We add **Grok-2-vision-1212** to the single-image leaderboard. The model seems to use a lot of tokens per image, and cannot run many of our multi-image and video tasks.
 
279
  "Qwen2.5-VL-72B",
280
  "Gemma_3_27B_IT",
281
  "Gemini_2.5_pro_0325",
282
+ "InternVL3_38B",
283
  "InternVL3_78B",
284
  ],
285
  "Efficiency Models": [
 
339
  "MiniMax-VL-01",
340
  "Qwen2.5-VL-72B",
341
  "Gemma_3_27B_IT",
342
+ "InternVL3_38B",
343
  "InternVL3_78B",
344
  ],
345
  "Open-source Efficiency Models": [