VLMVibeEval / app.py
merve's picture
merve HF Staff
Update description
b22f824 verified
import gradio as gr
import gradio.themes.base
from utils import *
from data_utils import *
from datasets import load_dataset
ds = load_dataset("visionLMsftw/vibe-testing-samples", split="train")
evaluation_data = get_evaluation_data(ds)
ds_results = load_dataset("visionLMsftw/vibe-testing-results", split="train")
models = get_model_names(ds_results)
responses = get_responses(ds_results)
model_params = {
"Qwen/Qwen2.5-VL-32B-Instruct": 32,
"google/gemma-3-27b-it": 27,
"meta-llama/Llama-4-Maverick-17B-128E-Instruct": 17,
"Qwen/Qwen2.5-VL-7B-Instruct": 7,
"HuggingFaceTB/SmolVLM2-2.2B-Instruct": 2.2,
}
def filter_models_by_param(min_params):
filtered_models = [m for m, p in model_params.items() if p >= min_params]
selected = filtered_models[0] if filtered_models else None
return gr.update(choices=filtered_models, value=selected)
def display_model_details(model_name):
if model_name not in model_params:
return "No info available."
size = model_params[model_name]
provider = model_name.split("/")[0] if "/" in model_name else "Unknown"
link = f"https://huggingface.co/{model_name}"
return f"""
<div style="margin-top: 10px; font-size: 14px; display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
<span><strong>Provider:</strong> {provider}</span>
<span style="color: #999;">|</span>
<span><strong>Size:</strong> {size}B</span>
<span style="color: #999;">|</span>
<span><strong>Link:</strong> <a href="{link}" target="_blank">{model_name}</a></span>
</div>
"""
models = list(model_params.keys())
default_category = evaluation_data[0]["category"]
default_example_id = evaluation_data[0]["id"]
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# VLMVibeEval")
gr.Markdown(
"""
A lightweight leaderboard for evaluating Vision Language Models (VLMs) β€” based on vibes. 🌞
Traditional benchmarks don't give concrete signal for your use case and models are often saturated over them. Instead, we let you **vibe test** models across curated, in-the-wild examples:
1. Predefined categories with images and prompts.
2. Check any model on these examples.
3. Explore the generations and judge for yourself, as different models have different styles and strengths. πŸ—£οΈ
This is not about scores β€” it's about *how it feels*. You can submit new models in the community tab and we'll shortly update the app! πŸ€—
"""
)
mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")
with gr.Column(visible=True) as model_mode:
param_slider = gr.Slider(minimum=2, maximum=32, step=1, label="Minimum model parameters (B)")
selected_model = gr.Dropdown(models, label="Choose model")
model_info_box = gr.HTML()
param_slider.change(filter_models_by_param, inputs=param_slider, outputs=selected_model)
model_category = gr.Dropdown(
choices=list(set(ex["category"] for ex in evaluation_data)),
label="Category",
value=default_category
)
model_output = gr.HTML()
current_index = gr.State(value=0)
current_html = gr.State(value="")
def load_initial(model, category):
filtered_data = [ex for ex in evaluation_data if ex["category"] == category]
html = display_model_responses_html(evaluation_data, responses, model, start_index=0, batch_size=5, category=category)
has_more = 5 < len(filtered_data)
model_info_html = display_model_details(model)
return html, 5, html, gr.update(visible=has_more), model_info_html
def load_more(model, index, html_so_far, category):
filtered_data = [ex for ex in evaluation_data if ex["category"] == category]
new_html = display_model_responses_html(evaluation_data, responses, model, start_index=index, batch_size=5, category=category)
updated_html = html_so_far + new_html
new_index = index + 5
has_more = new_index < len(filtered_data)
return updated_html, new_index, updated_html, gr.update(visible=has_more)
more_button = gr.Button("Load more")
selected_model.change(
load_initial,
inputs=[selected_model, model_category],
outputs=[model_output, current_index, current_html, more_button, model_info_box]
)
model_category.change(
load_initial,
inputs=[selected_model, model_category],
outputs=[model_output, current_index, current_html, more_button, model_info_box]
)
demo.load(
load_initial,
inputs=[selected_model, model_category],
outputs=[model_output, current_index, current_html, more_button, model_info_box]
)
more_button.click(
load_more,
inputs=[selected_model, current_index, current_html, model_category],
outputs=[model_output, current_index, current_html, more_button]
)
with gr.Column(visible=False) as example_mode:
category = gr.Dropdown(
choices=list(set(ex["category"] for ex in evaluation_data)),
label="Category",
value=default_category
)
example = gr.Dropdown(
label="Example",
value=default_example_id,
choices=get_examples_by_category(evaluation_data, default_category)
)
example_display = gr.HTML()
category.change(lambda c: gr.update(choices=get_examples_by_category(evaluation_data, c)), category, example)
example.change(
fn=lambda ex_id: display_example_responses_html(evaluation_data, responses, models, ex_id),
inputs=example,
outputs=example_display
)
demo.load(fn=lambda: display_example_responses_html(evaluation_data, responses, models, default_example_id), inputs=None, outputs=example_display)
def switch_mode(selected):
return {
model_mode: gr.update(visible=selected == "View model-wise responses"),
example_mode: gr.update(visible=selected == "Compare model responses on a specific example"),
}
mode.change(switch_mode, mode, [model_mode, example_mode])
gr.HTML(r"""
<style>
#image-modal {
display: none;
position: fixed;
z-index: 999;
left: 0; top: 0;
width: 100%; height: 100%;
background-color: rgba(0, 0, 0, 0.8);
align-items: center;
justify-content: center;
}
#image-modal img {
max-width: 90%;
max-height: 90%;
border-radius: 8px;
box-shadow: 0 0 20px rgba(255,255,255,0.3);
}
#image-modal .close {
position: absolute;
top: 20px; right: 30px;
font-size: 32px;
color: #fff;
cursor: pointer;
font-weight: bold;
}
</style>
<div id="image-modal" onclick="closeModal(event)">
<span class="close" onclick="closeModal(event)">&times;</span>
<img id="modal-img" src="" alt="Enlarged Image" />
</div>
<script>
function openImage(src) {
const modal = document.getElementById('image-modal');
const img = document.getElementById('modal-img');
img.src = src;
modal.style.display = 'flex';
}
function closeModal(event) {
if (event.target.id === 'image-modal' || event.target.classList.contains('close')) {
document.getElementById('image-modal').style.display = 'none';
}
}
// Optional: close on ESC key
document.addEventListener('keydown', function(e) {
if (e.key === "Escape") {
document.getElementById('image-modal').style.display = 'none';
}
});
</script>
""")
demo.launch()