Spaces:

sflindrs
/

vlm_comparer

Running on Zero

File size: 3,530 Bytes

0fb4283

import gradio as gr
from transformers import pipeline
import os
import spaces

# Define some pre-populated vision models.
PREDEFINED_MODELS = {
    "ViT Base (google/vit-base-patch16-224)": "google/vit-base-patch16-224",
    "DeiT Base (facebook/deit-base-distilled-patch16-224)": "facebook/deit-base-distilled-patch16-224",
    "CLIP ViT Base (openai/clip-vit-base-patch32)": "openai/clip-vit-base-patch32"
}

@spaces.GPU
def compare_vision_models(image, model1_choice, model1_custom, model2_choice, model2_custom):
    """
    For each model selection, use the pre-defined model identifier unless the user selects "Custom" and enters an identifier.
    Then create an image-classification pipeline for each model and run inference on the provided image.
    """
    # Determine the model names to use:
    model1_name = (
        PREDEFINED_MODELS.get(model1_choice, model1_custom)
        if model1_choice != "Custom" else model1_custom
    )
    model2_name = (
        PREDEFINED_MODELS.get(model2_choice, model2_custom)
        if model2_choice != "Custom" else model2_custom
    )
    
    # Optionally, if you deploy on a GPU-enabled space (e.g. using ZeroGPU), you can set device=0.
    # Here, we check an environment variable "USE_GPU" (set it to "1" in your Space's settings if needed).
    device = 0 if os.environ.get("USE_GPU", "0") == "1" else -1

    # Create pipelines. In this example we assume the models support image classification.
    classifier1 = pipeline("image-classification", model=model1_name, device=device)
    classifier2 = pipeline("image-classification", model=model2_name, device=device)

    # Run inference
    preds1 = classifier1(image)
    preds2 = classifier2(image)

    # Format the predictions as text (each line shows the predicted label and its confidence score)
    result1 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds1])
    result2 = "\n".join([f"{pred['label']}: {pred['score']:.3f}" for pred in preds2])
    
    return result1, result2

# Build the Gradio interface using Blocks.
with gr.Blocks(title="Vision Model Comparison Tool") as demo:
    gr.Markdown("## Vision Model Comparison Tool\nSelect two Hugging Face vision models to compare their outputs side-by-side!")
    with gr.Row():
        with gr.Column():
            gr.Markdown("### Model 1")
            model1_choice = gr.Dropdown(
                choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
                label="Select a pre-defined model or 'Custom'"
            )
            model1_custom = gr.Textbox(
                label="Custom Hugging Face Model",
                placeholder="e.g., username/model_name"
            )
        with gr.Column():
            gr.Markdown("### Model 2")
            model2_choice = gr.Dropdown(
                choices=list(PREDEFINED_MODELS.keys()) + ["Custom"],
                label="Select a pre-defined model or 'Custom'"
            )
            model2_custom = gr.Textbox(
                label="Custom Hugging Face Model",
                placeholder="e.g., username/model_name"
            )
    image_input = gr.Image(label="Input Image", type="pil")
    compare_btn = gr.Button("Compare Models")
    with gr.Row():
        output1 = gr.Textbox(label="Model 1 Output")
        output2 = gr.Textbox(label="Model 2 Output")

    compare_btn.click(
        fn=compare_vision_models,
        inputs=[image_input, model1_choice, model1_custom, model2_choice, model2_custom],
        outputs=[output1, output2]
    )

demo.launch()