InsectSAM / app.py
Martin Tomov
Update app.py
6e9745a verified
raw
history blame
10.1 kB
import os
os.system('pip install gradio==4.29.0') # as gradio==4.29.0 doesn't work in requirements.txt
import random
from dataclasses import dataclass
from typing import Any, List, Dict, Optional, Union, Tuple
import cv2
import torch
import requests
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
import gradio as gr
import spaces
@dataclass
class BoundingBox:
xmin: int
ymin: int
xmax: int
ymax: int
@property
def xyxy(self) -> List[float]:
return [self.xmin, self.ymin, self.xmax, self.ymax]
@dataclass
class DetectionResult:
score: float
label: str
box: BoundingBox
mask: Optional[np.ndarray] = None
@classmethod
def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
return cls(
score=detection_dict['score'],
label=detection_dict['label'],
box=BoundingBox(
xmin=detection_dict['box']['xmin'],
ymin=detection_dict['box']['ymin'],
xmax=detection_dict['box']['xmax'],
ymax=detection_dict['box']['ymax']
)
)
def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)
for detection in detection_results:
label = detection.label
score = detection.score
box = detection.box
mask = detection.mask
color = np.random.randint(0, 256, size=3).tolist()
cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
if mask is not None:
mask_uint8 = (mask * 255).astype(np.uint8)
contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
cv2.drawContours(image_cv2, contours, -1, color, 2)
return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
annotated_image = annotate(image, detections)
return annotated_image
def load_image(image: Union[str, Image.Image]) -> Image.Image:
if isinstance(image, str) and image.startswith("http"):
image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
elif isinstance(image, str):
image = Image.open(image).convert("RGB")
else:
image = image.convert("RGB")
return image
def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
boxes = []
for result in detection_results:
xyxy = result.box.xyxy
boxes.append(xyxy)
return [boxes]
def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
if len(contours) == 0:
return np.array([])
largest_contour = max(contours, key=cv2.contourArea)
return largest_contour
def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
masks = (masks > 0).astype(np.uint8)
if polygon_refinement:
for idx, mask in enumerate(masks):
shape = mask.shape
polygon = mask_to_polygon(mask)
masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
return list(masks)
@spaces.GPU
def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda")
labels = [label if label.endswith(".") else label+"." for label in labels]
results = object_detector(image, candidate_labels=labels, threshold=threshold)
return [DetectionResult.from_dict(result) for result in results]
@spaces.GPU
def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda")
processor = AutoProcessor.from_pretrained(segmenter_id)
boxes = get_boxes(detection_results)
inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda")
outputs = segmentator(**inputs)
masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
masks = refine_masks(masks, polygon_refinement)
for detection_result, mask in zip(detection_results, masks):
detection_result.mask = mask
return detection_results
def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
image = load_image(image)
detections = detect(image, labels, threshold, detector_id)
detections = segment(image, detections, polygon_refinement, segmenter_id)
return np.array(image), detections
def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
y, x = np.where(mask)
return x.min(), y.min(), x.max(), y.max()
def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
mask = detection.mask
xmin, ymin, xmax, ymax = mask_to_min_max(mask)
insect_crop = original_image[ymin:ymax, xmin:xmax]
mask_crop = mask[ymin:ymax, xmin:xmax]
insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
x_offset, y_offset = detection.box.xmin, detection.box.ymin
x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
inverse_mask = cv2.bitwise_not(mask_crop)
bg_region = background[y_offset:y_end, x_offset:x_end]
bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask)
combined = cv2.add(insect, bg_ready)
background[y_offset:y_end, x_offset:x_end] = combined
def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
yellow_background = np.full((image.shape[0], image.shape[1], 3), (0, 255, 255), dtype=np.uint8)
for detection in detections:
if detection.mask is not None:
extract_and_paste_insect(image, detection, yellow_background)
return yellow_background
def draw_classification_boxes(image_with_insects, detections):
for detection in detections:
label = detection.label
score = detection.score
box = detection.box
color = np.random.randint(0, 256, size=3).tolist()
cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
(text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
cv2.rectangle(
image_with_insects,
(box.xmin, box.ymin - text_height - baseline),
(box.xmin + text_width, box.ymin),
color,
thickness=cv2.FILLED
)
cv2.putText(
image_with_insects,
f"{label}: {score:.2f}",
(box.xmin, box.ymin - baseline),
cv2.FONT_HERSHEY_SIMPLEX,
0.5,
(255, 255, 255),
2
)
return image_with_insects
def plot_detections_plotly(image: np.ndarray, detections: List[DetectionResult]) -> str:
from plotly import graph_objects as go
import plotly.express as px
fig = px.imshow(image)
class_colors = {i: f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})' for i in range(len(detections))}
for idx, detection in enumerate(detections):
label = detection.label
box = detection.box
score = detection.score
mask = detection.mask
polygon = mask_to_polygon(mask)
fig.add_trace(go.Scatter(
x=[point[0] for point in polygon] + [polygon[0][0]],
y=[point[1] for point in polygon] + [polygon[0][1]],
mode='lines',
line=dict(color=class_colors[idx], width=2),
fill='toself',
name=f"{label}: {score:.2f}"
))
xmin, ymin, xmax, ymax = box.xyxy
fig.add_shape(
type="rect",
x0=xmin, y0=ymin, x1=xmax, y1=ymax,
line=dict(color=class_colors[idx])
)
fig.add_annotation(
x=(xmin + xmax) // 2, y=(ymin + ymax) // 2,
text=f"{label}: {score:.2f}",
)
fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False))
file_path = "/tmp/plotly_image.html"
fig.write_html(file_path)
return file_path
def process_image(image):
labels = ["insect"]
original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True)
annotated_image = plot_detections(original_image, detections)
yellow_background_with_insects = create_yellow_background_with_insects(np.array(original_image), detections)
yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects.copy(), detections)
plotly_image_path = plot_detections_plotly(original_image, detections)
return annotated_image, yellow_background_with_boxes, plotly_image_path
gr.Interface(
fn=process_image,
inputs=gr.Image(type="pil"),
outputs=[gr.Image(type="numpy"), gr.Image(type="numpy"), gr.HTML()],
title="🐞 InsectSAM + GroundingDINO Inference"
).launch()