Spaces:

visionLMsftw
/

VLMVibeEval

Running

App Files Files Community

VLMVibeEval / app.py

merve HF Staff

Update description

b22f824 verified 5 days ago

raw

history blame contribute delete

8.24 kB

	import gradio as gr
	import gradio.themes.base

	from utils import *
	from data_utils import *

	from datasets import load_dataset

	ds = load_dataset("visionLMsftw/vibe-testing-samples", split="train")
	evaluation_data = get_evaluation_data(ds)
	ds_results = load_dataset("visionLMsftw/vibe-testing-results", split="train")
	models = get_model_names(ds_results)
	responses = get_responses(ds_results)

	model_params = {
	"Qwen/Qwen2.5-VL-32B-Instruct": 32,
	"google/gemma-3-27b-it": 27,
	"meta-llama/Llama-4-Maverick-17B-128E-Instruct": 17,
	"Qwen/Qwen2.5-VL-7B-Instruct": 7,
	"HuggingFaceTB/SmolVLM2-2.2B-Instruct": 2.2,
	}

	def filter_models_by_param(min_params):
	filtered_models = [m for m, p in model_params.items() if p >= min_params]
	selected = filtered_models[0] if filtered_models else None
	return gr.update(choices=filtered_models, value=selected)

	def display_model_details(model_name):
	if model_name not in model_params:
	return "No info available."

	size = model_params[model_name]
	provider = model_name.split("/")[0] if "/" in model_name else "Unknown"
	link = f"https://huggingface.co/{model_name}"

	return f"""
	<div style="margin-top: 10px; font-size: 14px; display: flex; gap: 12px; align-items: center; flex-wrap: wrap;">
	<span><strong>Provider:</strong> {provider}</span>
	<span style="color: #999;">\|</span>
	<span><strong>Size:</strong> {size}B</span>
	<span style="color: #999;">\|</span>
	<span><strong>Link:</strong> <a href="{link}" target="_blank">{model_name}</a></span>
	</div>
	"""

	models = list(model_params.keys())

	default_category = evaluation_data[0]["category"]
	default_example_id = evaluation_data[0]["id"]


	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# VLMVibeEval")
	gr.Markdown(
	"""
	A lightweight leaderboard for evaluating Vision Language Models (VLMs) — based on vibes. 🌞

	Traditional benchmarks don't give concrete signal for your use case and models are often saturated over them. Instead, we let you vibe test models across curated, in-the-wild examples:

	1. Predefined categories with images and prompts.
	2. Check any model on these examples.
	3. Explore the generations and judge for yourself, as different models have different styles and strengths. 🗣️

	This is not about scores — it's about how it feels. You can submit new models in the community tab and we'll shortly update the app! 🤗
	"""
	)

	mode = gr.Radio(["View model-wise responses", "Compare model responses on a specific example"], label="Mode", value="View model-wise responses")

	with gr.Column(visible=True) as model_mode:
	param_slider = gr.Slider(minimum=2, maximum=32, step=1, label="Minimum model parameters (B)")
	selected_model = gr.Dropdown(models, label="Choose model")
	model_info_box = gr.HTML()

	param_slider.change(filter_models_by_param, inputs=param_slider, outputs=selected_model)
	model_category = gr.Dropdown(
	choices=list(set(ex["category"] for ex in evaluation_data)),
	label="Category",
	value=default_category
	)
	model_output = gr.HTML()
	current_index = gr.State(value=0)
	current_html = gr.State(value="")

	def load_initial(model, category):
	filtered_data = [ex for ex in evaluation_data if ex["category"] == category]
	html = display_model_responses_html(evaluation_data, responses, model, start_index=0, batch_size=5, category=category)
	has_more = 5 < len(filtered_data)

	model_info_html = display_model_details(model)

	return html, 5, html, gr.update(visible=has_more), model_info_html


	def load_more(model, index, html_so_far, category):
	filtered_data = [ex for ex in evaluation_data if ex["category"] == category]

	new_html = display_model_responses_html(evaluation_data, responses, model, start_index=index, batch_size=5, category=category)
	updated_html = html_so_far + new_html

	new_index = index + 5
	has_more = new_index < len(filtered_data)

	return updated_html, new_index, updated_html, gr.update(visible=has_more)


	more_button = gr.Button("Load more")

	selected_model.change(
	load_initial,
	inputs=[selected_model, model_category],
	outputs=[model_output, current_index, current_html, more_button, model_info_box]
	)
	model_category.change(
	load_initial,
	inputs=[selected_model, model_category],
	outputs=[model_output, current_index, current_html, more_button, model_info_box]
	)

	demo.load(
	load_initial,
	inputs=[selected_model, model_category],
	outputs=[model_output, current_index, current_html, more_button, model_info_box]
	)

	more_button.click(
	load_more,
	inputs=[selected_model, current_index, current_html, model_category],
	outputs=[model_output, current_index, current_html, more_button]
	)


	with gr.Column(visible=False) as example_mode:
	category = gr.Dropdown(
	choices=list(set(ex["category"] for ex in evaluation_data)),
	label="Category",
	value=default_category
	)
	example = gr.Dropdown(
	label="Example",
	value=default_example_id,
	choices=get_examples_by_category(evaluation_data, default_category)
	)
	example_display = gr.HTML()

	category.change(lambda c: gr.update(choices=get_examples_by_category(evaluation_data, c)), category, example)
	example.change(
	fn=lambda ex_id: display_example_responses_html(evaluation_data, responses, models, ex_id),
	inputs=example,
	outputs=example_display
	)

	demo.load(fn=lambda: display_example_responses_html(evaluation_data, responses, models, default_example_id), inputs=None, outputs=example_display)

	def switch_mode(selected):
	return {
	model_mode: gr.update(visible=selected == "View model-wise responses"),
	example_mode: gr.update(visible=selected == "Compare model responses on a specific example"),
	}

	mode.change(switch_mode, mode, [model_mode, example_mode])
	gr.HTML(r"""
	<style>
	#image-modal {
	display: none;
	position: fixed;
	z-index: 999;
	left: 0; top: 0;
	width: 100%; height: 100%;
	background-color: rgba(0, 0, 0, 0.8);
	align-items: center;
	justify-content: center;
	}
	#image-modal img {
	max-width: 90%;
	max-height: 90%;
	border-radius: 8px;
	box-shadow: 0 0 20px rgba(255,255,255,0.3);
	}
	#image-modal .close {
	position: absolute;
	top: 20px; right: 30px;
	font-size: 32px;
	color: #fff;
	cursor: pointer;
	font-weight: bold;
	}
	</style>
	<div id="image-modal" onclick="closeModal(event)">
	<span class="close" onclick="closeModal(event)">×</span>
	<img id="modal-img" src="" alt="Enlarged Image" />
	</div>

	<script>
	function openImage(src) {
	const modal = document.getElementById('image-modal');
	const img = document.getElementById('modal-img');
	img.src = src;
	modal.style.display = 'flex';
	}
	function closeModal(event) {
	if (event.target.id === 'image-modal' \|\| event.target.classList.contains('close')) {
	document.getElementById('image-modal').style.display = 'none';
	}
	}
	// Optional: close on ESC key
	document.addEventListener('keydown', function(e) {
	if (e.key === "Escape") {
	document.getElementById('image-modal').style.display = 'none';
	}
	});
	</script>
	""")

	demo.launch()