EfficientSAM / app.py
SkalskiP's picture
Box prompt working
de55408
raw
history blame
5.16 kB
from typing import Tuple
import gradio as gr
import numpy as np
import supervision as sv
import torch
from PIL import Image
from transformers import SamModel, SamProcessor
from utils.efficient_sam import load, inference_with_box
MARKDOWN = """
# EfficientSAM sv. SAM
This is a demo for comparing the performance of
[EfficientSAM](https://arxiv.org/abs/2312.00863) and
[SAM](https://arxiv.org/abs/2304.02643).
"""
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
SAM_MODEL = SamModel.from_pretrained("facebook/sam-vit-huge").to(DEVICE)
SAM_PROCESSOR = SamProcessor.from_pretrained("facebook/sam-vit-huge")
EFFICIENT_SAM_MODEL = load(device=DEVICE)
MASK_ANNOTATOR = sv.MaskAnnotator(
color=sv.Color.red(),
color_lookup=sv.ColorLookup.INDEX)
BOX_ANNOTATOR = sv.BoundingBoxAnnotator(
color=sv.Color.red(),
color_lookup=sv.ColorLookup.INDEX)
def annotate_image(image: np.ndarray, detections: sv.Detections) -> np.ndarray:
bgr_image = image[:, :, ::-1]
annotated_bgr_image = MASK_ANNOTATOR.annotate(
scene=bgr_image, detections=detections)
annotated_bgr_image = BOX_ANNOTATOR.annotate(
scene=annotated_bgr_image, detections=detections)
return annotated_bgr_image[:, :, ::-1]
def efficient_sam_inference(
image: np.ndarray,
x_min: int,
y_min: int,
x_max: int,
y_max: int
) -> np.ndarray:
box = np.array([[x_min, y_min], [x_max, y_max]])
mask = inference_with_box(image, box, EFFICIENT_SAM_MODEL, DEVICE)
mask = mask[np.newaxis, ...]
detections = sv.Detections(xyxy=sv.mask_to_xyxy(masks=mask), mask=mask)
return annotate_image(image=image, detections=detections)
def sam_inference(
image: np.ndarray,
x_min: int,
y_min: int,
x_max: int,
y_max: int
) -> np.ndarray:
input_boxes = [[[x_min, y_min, x_max, y_max]]]
inputs = SAM_PROCESSOR(
Image.fromarray(image),
input_boxes=[input_boxes],
return_tensors="pt"
).to(DEVICE)
with torch.no_grad():
outputs = SAM_MODEL(**inputs)
mask = SAM_PROCESSOR.image_processor.post_process_masks(
outputs.pred_masks.cpu(),
inputs["original_sizes"].cpu(),
inputs["reshaped_input_sizes"].cpu()
)[0][0][0].numpy()
mask = mask[np.newaxis, ...]
detections = sv.Detections(xyxy=sv.mask_to_xyxy(masks=mask), mask=mask)
return annotate_image(image=image, detections=detections)
def inference(
image: np.ndarray,
x_min: int,
y_min: int,
x_max: int,
y_max: int
) -> Tuple[np.ndarray, np.ndarray]:
return (
efficient_sam_inference(image, x_min, y_min, x_max, y_max),
sam_inference(image, x_min, y_min, x_max, y_max)
)
def clear(_: np.ndarray) -> Tuple[None, None]:
return None, None
with gr.Blocks() as demo:
gr.Markdown(MARKDOWN)
with gr.Tab(label="Box prompt"):
with gr.Row():
with gr.Column():
input_image = gr.Image()
with gr.Accordion(label="Box", open=False):
with gr.Row():
x_min_number = gr.Number(label="x_min")
y_min_number = gr.Number(label="y_min")
x_max_number = gr.Number(label="x_max")
y_max_number = gr.Number(label="y_max")
efficient_sam_output_image = gr.Image(label="EfficientSAM")
sam_output_image = gr.Image(label="SAM")
with gr.Row():
submit_button = gr.Button("Submit")
gr.Examples(
fn=inference,
examples=[
[
'https://media.roboflow.com/efficient-sam/beagle.jpeg',
69,
26,
625,
704
],
[
'https://media.roboflow.com/efficient-sam/corgi.jpg',
801,
510,
1782,
993
],
[
'https://media.roboflow.com/efficient-sam/horses.jpg',
814,
696,
1523,
1183
],
[
'https://media.roboflow.com/efficient-sam/bears.jpg',
653,
874,
1173,
1229
]
],
inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number],
outputs=[efficient_sam_output_image, sam_output_image],
)
submit_button.click(
efficient_sam_inference,
inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number],
outputs=efficient_sam_output_image
)
submit_button.click(
sam_inference,
inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number],
outputs=sam_output_image
)
input_image.change(
clear,
inputs=input_image,
outputs=[efficient_sam_output_image, sam_output_image]
)
demo.launch(debug=False, show_error=True)