Spaces:

SkalskiP
/

segment-anything-model-2

Runtime error

File size: 4,338 Bytes

5bd8d5b
fe42dd8
5bd8d5b
 
 
61d1727
5bd8d5b
 
ec0b3c1
 
5bd8d5b
 
 
ec0b3c1
 
 
 
 
 
 
 
 
 
 
 
 
 
5bd8d5b
 
 
 
ec0b3c1
 
 
 
 
5bd8d5b
ec0b3c1
aa009f7
 
 
 
ec0b3c1
e4d42b3
ec0b3c1
5bd8d5b
ec0b3c1
61d1727
5bd8d5b
aa009f7
 
 
 
 
5bd8d5b
 
 
 
 
 
 
 
ec0b3c1
 
 
 
 
 
 
aa009f7
 
 
 
 
 
 
 
5bd8d5b
 
 
 
 
 
ec0b3c1
 
 
 
aa009f7
 
 
 
 
ec0b3c1
 
 
5bd8d5b
 
 
aa009f7
 
 
 
 
5bd8d5b

from typing import Optional

import gradio as gr
import numpy as np
import supervision as sv
import torch
from PIL import Image
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator

from utils.models import load_models, CHECKPOINT_NAMES

MARKDOWN = """
# Segment Anything Model 2 🔥
<div>
    <a href="https://github.com/facebookresearch/segment-anything-2">
        <img src="https://badges.aleen42.com/src/github.svg" alt="GitHub" style="display:inline-block;">
    </a>
    <a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-segment-images-with-sam-2.ipynb">
        <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" style="display:inline-block;">
    </a>
    <a href="https://blog.roboflow.com/what-is-segment-anything-2/">
        <img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="Roboflow" style="display:inline-block;">
    </a>
    <a href="https://www.youtube.com/watch?v=Dv003fTyO-Y">
        <img src="https://badges.aleen42.com/src/youtube.svg" alt="YouTube" style="display:inline-block;">
    </a>
</div>

Segment Anything Model 2 (SAM 2) is a foundation model designed to address promptable 
visual segmentation in both images and videos. The model extends its functionality to 
video by treating images as single-frame videos. Its design, a simple transformer 
architecture with streaming memory, enables real-time video processing. A 
model-in-the-loop data engine, which enhances the model and data through user 
interaction, was built to collect the SA-V dataset, the largest video segmentation 
dataset to date. SAM 2, trained on this extensive dataset, delivers robust performance 
across diverse tasks and visual domains.
"""
EXAMPLES = [
    ["tiny", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg", 16],
    ["small", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 16],
    ["large", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 16],
    ["large", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg", 64],
]

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
MODELS = load_models(device=DEVICE)


def process(checkpoint_dropdown, image_input, points_per_side) -> Optional[Image.Image]:
    model = MODELS[checkpoint_dropdown]
    mask_generator = SAM2AutomaticMaskGenerator(
        model=model,
        points_per_side=points_per_side)
    image = np.array(image_input.convert("RGB"))
    sam_result = mask_generator.generate(image)
    detections = sv.Detections.from_sam(sam_result=sam_result)
    return MASK_ANNOTATOR.annotate(scene=image_input, detections=detections)


with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN)
    with gr.Row():
        checkpoint_dropdown_component = gr.Dropdown(
            choices=CHECKPOINT_NAMES,
            value=CHECKPOINT_NAMES[0],
            label="Checkpoint", info="Select a SAM2 checkpoint to use.",
            interactive=True
        )
        points_per_side_component = gr.Slider(
            minimum=16,
            maximum=64,
            value=16,
            step=16,
            label="Points per side",
            info="the number of points to be sampled along one side of the image."
        )
    with gr.Row():
        with gr.Column():
            image_input_component = gr.Image(type='pil', label='Upload image')
            submit_button_component = gr.Button(value='Submit', variant='primary')
        with gr.Column():
            image_output_component = gr.Image(type='pil', label='Image Output')
    with gr.Row():
        gr.Examples(
            fn=process,
            examples=EXAMPLES,
            inputs=[
                checkpoint_dropdown_component,
                image_input_component,
                points_per_side_component
            ],
            outputs=[image_output_component],
            run_on_click=True
        )

    submit_button_component.click(
        fn=process,
        inputs=[
            checkpoint_dropdown_component,
            image_input_component,
            points_per_side_component
        ],
        outputs=[image_output_component]
    )

demo.launch(debug=False, show_error=True, max_threads=1)