Spaces:
Running
on
Zero
Running
on
Zero
import os | |
os.system('pip install gradio==4.29.0') # as gradio==4.29.0 doesn't work in requirements.txt | |
import random | |
from dataclasses import dataclass | |
from typing import Any, List, Dict, Optional, Union, Tuple | |
import cv2 | |
import torch | |
import requests | |
import numpy as np | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline | |
import gradio as gr | |
import spaces | |
class BoundingBox: | |
xmin: int | |
ymin: int | |
xmax: int | |
ymax: int | |
def xyxy(self) -> List[float]: | |
return [self.xmin, self.ymin, self.xmax, self.ymax] | |
class DetectionResult: | |
score: float | |
label: str | |
box: BoundingBox | |
mask: Optional[np.ndarray] = None | |
def from_dict(cls, detection_dict: Dict) -> 'DetectionResult': | |
return cls( | |
score=detection_dict['score'], | |
label=detection_dict['label'], | |
box=BoundingBox( | |
xmin=detection_dict['box']['xmin'], | |
ymin=detection_dict['box']['ymin'], | |
xmax=detection_dict['box']['xmax'], | |
ymax=detection_dict['box']['ymax'] | |
) | |
) | |
def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray: | |
image_cv2 = np.array(image) if isinstance(image, Image.Image) else image | |
image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR) | |
for detection in detection_results: | |
label = detection.label | |
score = detection.score | |
box = detection.box | |
mask = detection.mask | |
color = np.random.randint(0, 256, size=3).tolist() | |
cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2) | |
cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10), | |
cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
if mask is not None: | |
mask_uint8 = (mask * 255).astype(np.uint8) | |
contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
cv2.drawContours(image_cv2, contours, -1, color, 2) | |
return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB) | |
def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray: | |
annotated_image = annotate(image, detections) | |
return annotated_image | |
def load_image(image: Union[str, Image.Image]) -> Image.Image: | |
if isinstance(image, str) and image.startswith("http"): | |
image = Image.open(requests.get(image, stream=True).raw).convert("RGB") | |
elif isinstance(image, str): | |
image = Image.open(image).convert("RGB") | |
else: | |
image = image.convert("RGB") | |
return image | |
def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]: | |
boxes = [] | |
for result in detection_results: | |
xyxy = result.box.xyxy | |
boxes.append(xyxy) | |
return [boxes] | |
def mask_to_polygon(mask: np.ndarray) -> np.ndarray: | |
contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
if len(contours) == 0: | |
return np.array([]) | |
largest_contour = max(contours, key=cv2.contourArea) | |
return largest_contour | |
def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]: | |
masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8) | |
masks = (masks > 0).astype(np.uint8) | |
if polygon_refinement: | |
for idx, mask in enumerate(masks): | |
shape = mask.shape | |
polygon = mask_to_polygon(mask) | |
masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1) | |
return list(masks) | |
def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]: | |
detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base" | |
object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda") | |
labels = [label if label.endswith(".") else label+"." for label in labels] | |
results = object_detector(image, candidate_labels=labels, threshold=threshold) | |
return [DetectionResult.from_dict(result) for result in results] | |
def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]: | |
segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM" | |
segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda") | |
processor = AutoProcessor.from_pretrained(segmenter_id) | |
boxes = get_boxes(detection_results) | |
inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda") | |
outputs = segmentator(**inputs) | |
masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0] | |
masks = refine_masks(masks, polygon_refinement) | |
for detection_result, mask in zip(detection_results, masks): | |
detection_result.mask = mask | |
return detection_results | |
def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]: | |
image = load_image(image) | |
detections = detect(image, labels, threshold, detector_id) | |
detections = segment(image, detections, polygon_refinement, segmenter_id) | |
return np.array(image), detections | |
def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]: | |
y, x = np.where(mask) | |
return x.min(), y.min(), x.max(), y.max() | |
def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None: | |
mask = detection.mask | |
xmin, ymin, xmax, ymax = mask_to_min_max(mask) | |
insect_crop = original_image[ymin:ymax, xmin:xmax] | |
mask_crop = mask[ymin:ymax, xmin:xmax] | |
insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop) | |
x_offset, y_offset = detection.box.xmin, detection.box.ymin | |
x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0] | |
inverse_mask = cv2.bitwise_not(mask_crop) | |
bg_region = background[y_offset:y_end, x_offset:x_end] | |
bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask) | |
combined = cv2.add(insect, bg_ready) | |
background[y_offset:y_end, x_offset:x_end] = combined | |
def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray: | |
yellow_background = np.full((image.shape[0], image.shape[1], 3), (0, 255, 255), dtype=np.uint8) | |
for detection in detections: | |
if detection.mask is not None: | |
extract_and_paste_insect(image, detection, yellow_background) | |
return yellow_background | |
def draw_classification_boxes(image_with_insects, detections): | |
for detection in detections: | |
label = detection.label | |
score = detection.score | |
box = detection.box | |
color = np.random.randint(0, 256, size=3).tolist() | |
cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2) | |
(text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2) | |
cv2.rectangle( | |
image_with_insects, | |
(box.xmin, box.ymin - text_height - baseline), | |
(box.xmin + text_width, box.ymin), | |
color, | |
thickness=cv2.FILLED | |
) | |
cv2.putText( | |
image_with_insects, | |
f"{label}: {score:.2f}", | |
(box.xmin, box.ymin - baseline), | |
cv2.FONT_HERSHEY_SIMPLEX, | |
0.5, | |
(255, 255, 255), | |
2 | |
) | |
return image_with_insects | |
def plot_detections_plotly(image: np.ndarray, detections: List[DetectionResult]) -> str: | |
from plotly import graph_objects as go | |
import plotly.express as px | |
fig = px.imshow(image) | |
class_colors = {i: f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})' for i in range(len(detections))} | |
for idx, detection in enumerate(detections): | |
label = detection.label | |
box = detection.box | |
score = detection.score | |
mask = detection.mask | |
polygon = mask_to_polygon(mask) | |
fig.add_trace(go.Scatter( | |
x=[point[0] for point in polygon] + [polygon[0][0]], | |
y=[point[1] for point in polygon] + [polygon[0][1]], | |
mode='lines', | |
line=dict(color=class_colors[idx], width=2), | |
fill='toself', | |
name=f"{label}: {score:.2f}" | |
)) | |
xmin, ymin, xmax, ymax = box.xyxy | |
fig.add_shape( | |
type="rect", | |
x0=xmin, y0=ymin, x1=xmax, y1=ymax, | |
line=dict(color=class_colors[idx]) | |
) | |
fig.add_annotation( | |
x=(xmin + xmax) // 2, y=(ymin + ymax) // 2, | |
text=f"{label}: {score:.2f}", | |
) | |
fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False)) | |
file_path = "/tmp/plotly_image.html" | |
fig.write_html(file_path) | |
return file_path | |
def process_image(image): | |
labels = ["insect"] | |
original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True) | |
annotated_image = plot_detections(original_image, detections) | |
yellow_background_with_insects = create_yellow_background_with_insects(np.array(original_image), detections) | |
yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects.copy(), detections) | |
plotly_image_path = plot_detections_plotly(original_image, detections) | |
return annotated_image, yellow_background_with_boxes, plotly_image_path | |
gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Image(type="numpy"), gr.Image(type="numpy"), gr.HTML()], | |
title="π InsectSAM + GroundingDINO Inference" | |
).launch() | |