Spaces:
Running
Running
IU improvement (#8)
Browse files- UI improvements (066e55e008b09de3f22f1b2ed7a8b0cccf9e5c3d)
- app.py +20 -5
- data_utils.py +1 -1
app.py
CHANGED
@@ -34,10 +34,12 @@ def display_model_details(model_name):
|
|
34 |
link = f"https://huggingface.co/{model_name}"
|
35 |
|
36 |
return f"""
|
37 |
-
<div style="margin-top: 10px; font-size:
|
38 |
-
<
|
39 |
-
<
|
40 |
-
<
|
|
|
|
|
41 |
</div>
|
42 |
"""
|
43 |
|
@@ -49,7 +51,20 @@ default_example_id = evaluation_data[0]["id"]
|
|
49 |
|
50 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
51 |
gr.Markdown("# VLMVibeEval")
|
52 |
-
gr.Markdown(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
|
54 |
|
55 |
with gr.Column(visible=True) as model_mode:
|
|
|
34 |
link = f"https://huggingface.co/{model_name}"
|
35 |
|
36 |
return f"""
|
37 |
+
<div style="margin-top: 10px; font-size: 14px; display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
|
38 |
+
<span><strong>Provider:</strong> {provider}</span>
|
39 |
+
<span style="color: #999;">|</span>
|
40 |
+
<span><strong>Size:</strong> {size}B</span>
|
41 |
+
<span style="color: #999;">|</span>
|
42 |
+
<span><strong>Link:</strong> <a href="{link}" target="_blank">{model_name}</a></span>
|
43 |
</div>
|
44 |
"""
|
45 |
|
|
|
51 |
|
52 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
53 |
gr.Markdown("# VLMVibeEval")
|
54 |
+
gr.Markdown(
|
55 |
+
"""
|
56 |
+
A lightweight leaderboard for evaluating Vision Language Models (VLMs) — based on vibes.
|
57 |
+
|
58 |
+
Traditional benchmarks can be misleading due to overlap with training data. Instead, we let you **vibe test** models across curated examples:
|
59 |
+
|
60 |
+
1. Predefined categories with images and prompts.
|
61 |
+
2. Check any model on these examples.
|
62 |
+
3. Explore the generations and judge for yourself.
|
63 |
+
|
64 |
+
This is not about scores — it's about *how it feels*.
|
65 |
+
"""
|
66 |
+
)
|
67 |
+
|
68 |
mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
|
69 |
|
70 |
with gr.Column(visible=True) as model_mode:
|
data_utils.py
CHANGED
@@ -12,7 +12,7 @@ def get_evaluation_data(ds):
|
|
12 |
"id": ds[i]["ex_id"],
|
13 |
"image_thumbnail": image_to_base64(thumbnail_img),
|
14 |
"image_full": image_to_base64(img),
|
15 |
-
"image_full_url": "https://
|
16 |
"prompt": ds[i]["prompt"],
|
17 |
"category": ds[i]["category"]
|
18 |
})
|
|
|
12 |
"id": ds[i]["ex_id"],
|
13 |
"image_thumbnail": image_to_base64(thumbnail_img),
|
14 |
"image_full": image_to_base64(img),
|
15 |
+
"image_full_url": "https://visionlmsftw-vibe-testing-images.hf.space/image/" + str(i),
|
16 |
"prompt": ds[i]["prompt"],
|
17 |
"category": ds[i]["category"]
|
18 |
})
|