Spaces:
Runtime error
Runtime error
File size: 3,754 Bytes
5bd8d5b fe42dd8 5bd8d5b 61d1727 5bd8d5b ec0b3c1 5bd8d5b ec0b3c1 5bd8d5b ec0b3c1 5bd8d5b ec0b3c1 e4d42b3 ec0b3c1 5bd8d5b ec0b3c1 61d1727 5bd8d5b ec0b3c1 5bd8d5b ec0b3c1 5bd8d5b ec0b3c1 5bd8d5b ec0b3c1 5bd8d5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
from typing import Optional
import gradio as gr
import numpy as np
import supervision as sv
import torch
from PIL import Image
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
from utils.models import load_models, CHECKPOINT_NAMES
MARKDOWN = """
# Segment Anything Model 2 🔥
<div>
<a href="https://github.com/facebookresearch/segment-anything-2">
<img src="https://badges.aleen42.com/src/github.svg" alt="GitHub" style="display:inline-block;">
</a>
<a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-segment-images-with-sam-2.ipynb">
<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" style="display:inline-block;">
</a>
<a href="https://blog.roboflow.com/what-is-segment-anything-2/">
<img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="Roboflow" style="display:inline-block;">
</a>
<a href="https://www.youtube.com/watch?v=Dv003fTyO-Y">
<img src="https://badges.aleen42.com/src/youtube.svg" alt="YouTube" style="display:inline-block;">
</a>
</div>
Segment Anything Model 2 (SAM 2) is a foundation model designed to address promptable
visual segmentation in both images and videos. The model extends its functionality to
video by treating images as single-frame videos. Its design, a simple transformer
architecture with streaming memory, enables real-time video processing. A
model-in-the-loop data engine, which enhances the model and data through user
interaction, was built to collect the SA-V dataset, the largest video segmentation
dataset to date. SAM 2, trained on this extensive dataset, delivers robust performance
across diverse tasks and visual domains.
"""
EXAMPLES = [
["tiny", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"],
["small", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg"],
["large", "https://media.roboflow.com/notebooks/examples/dog-3.jpeg"],
]
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MASK_ANNOTATOR = sv.MaskAnnotator(color_lookup=sv.ColorLookup.INDEX)
MODELS = load_models(device=DEVICE)
def process(checkpoint_dropdown, image_input) -> Optional[Image.Image]:
sam2_model = MODELS[checkpoint_dropdown]
mask_generator = SAM2AutomaticMaskGenerator(sam2_model)
image = np.array(image_input.convert("RGB"))
sam_result = mask_generator.generate(image)
detections = sv.Detections.from_sam(sam_result=sam_result)
return MASK_ANNOTATOR.annotate(scene=image_input, detections=detections)
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN)
with gr.Row():
checkpoint_dropdown_component = gr.Dropdown(
choices=CHECKPOINT_NAMES,
value=CHECKPOINT_NAMES[0],
label="Checkpoint", info="Select a SAM2 checkpoint to use.",
interactive=True
)
with gr.Row():
with gr.Column():
image_input_component = gr.Image(type='pil', label='Upload image')
submit_button_component = gr.Button(value='Submit', variant='primary')
with gr.Column():
image_output_component = gr.Image(type='pil', label='Image Output')
with gr.Row():
gr.Examples(
fn=process,
examples=EXAMPLES,
inputs=[checkpoint_dropdown_component, image_input_component],
outputs=[image_output_component],
run_on_click=True
)
submit_button_component.click(
fn=process,
inputs=[checkpoint_dropdown_component, image_input_component],
outputs=[image_output_component]
)
demo.launch(debug=False, show_error=True, max_threads=1)
|