sergiopaniego HF Staff commited on
Commit
c301de7
·
verified ·
1 Parent(s): 62b998d
Files changed (2) hide show
  1. app.py +20 -5
  2. data_utils.py +1 -1
app.py CHANGED
@@ -34,10 +34,12 @@ def display_model_details(model_name):
34
  link = f"https://huggingface.co/{model_name}"
35
 
36
  return f"""
37
- <div style="margin-top: 10px; font-size: 15px;">
38
- <p><strong>Provider:</strong> {provider}</p>
39
- <p><strong>Size:</strong> {size}B</p>
40
- <p><strong>Link:</strong> <a href="{link}" target="_blank">{link}</a></p>
 
 
41
  </div>
42
  """
43
 
@@ -49,7 +51,20 @@ default_example_id = evaluation_data[0]["id"]
49
 
50
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
51
  gr.Markdown("# VLMVibeEval")
52
- gr.Markdown("VLM evaluation leaderboard based on vibes.")
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
54
 
55
  with gr.Column(visible=True) as model_mode:
 
34
  link = f"https://huggingface.co/{model_name}"
35
 
36
  return f"""
37
+ <div style="margin-top: 10px; font-size: 14px; display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
38
+ <span><strong>Provider:</strong> {provider}</span>
39
+ <span style="color: #999;">|</span>
40
+ <span><strong>Size:</strong> {size}B</span>
41
+ <span style="color: #999;">|</span>
42
+ <span><strong>Link:</strong> <a href="{link}" target="_blank">{model_name}</a></span>
43
  </div>
44
  """
45
 
 
51
 
52
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
53
  gr.Markdown("# VLMVibeEval")
54
+ gr.Markdown(
55
+ """
56
+ A lightweight leaderboard for evaluating Vision Language Models (VLMs) — based on vibes.
57
+
58
+ Traditional benchmarks can be misleading due to overlap with training data. Instead, we let you **vibe test** models across curated examples:
59
+
60
+ 1. Predefined categories with images and prompts.
61
+ 2. Check any model on these examples.
62
+ 3. Explore the generations and judge for yourself.
63
+
64
+ This is not about scores — it's about *how it feels*.
65
+ """
66
+ )
67
+
68
  mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
69
 
70
  with gr.Column(visible=True) as model_mode:
data_utils.py CHANGED
@@ -12,7 +12,7 @@ def get_evaluation_data(ds):
12
  "id": ds[i]["ex_id"],
13
  "image_thumbnail": image_to_base64(thumbnail_img),
14
  "image_full": image_to_base64(img),
15
- "image_full_url": "https://sergiopaniego-vibe-testing-images.hf.space/image/" + str(i),
16
  "prompt": ds[i]["prompt"],
17
  "category": ds[i]["category"]
18
  })
 
12
  "id": ds[i]["ex_id"],
13
  "image_thumbnail": image_to_base64(thumbnail_img),
14
  "image_full": image_to_base64(img),
15
+ "image_full_url": "https://visionlmsftw-vibe-testing-images.hf.space/image/" + str(i),
16
  "prompt": ds[i]["prompt"],
17
  "category": ds[i]["category"]
18
  })