Spaces:

martintomov
/

InsectSAM

Running on Zero

InsectSAM / app.py

Martin Tomov

Update app.py

6e9745a verified about 1 year ago

10.1 kB

	import os
	os.system('pip install gradio==4.29.0') # as gradio==4.29.0 doesn't work in requirements.txt

	import random
	from dataclasses import dataclass
	from typing import Any, List, Dict, Optional, Union, Tuple
	import cv2
	import torch
	import requests
	import numpy as np
	from PIL import Image
	import matplotlib.pyplot as plt
	from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
	import gradio as gr
	import spaces

	@dataclass
	class BoundingBox:
	xmin: int
	ymin: int
	xmax: int
	ymax: int

	@property
	def xyxy(self) -> List[float]:
	return [self.xmin, self.ymin, self.xmax, self.ymax]

	@dataclass
	class DetectionResult:
	score: float
	label: str
	box: BoundingBox
	mask: Optional[np.ndarray] = None

	@classmethod
	def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
	return cls(
	score=detection_dict['score'],
	label=detection_dict['label'],
	box=BoundingBox(
	xmin=detection_dict['box']['xmin'],
	ymin=detection_dict['box']['ymin'],
	xmax=detection_dict['box']['xmax'],
	ymax=detection_dict['box']['ymax']
	)
	)

	def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
	image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
	image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)

	for detection in detection_results:
	label = detection.label
	score = detection.score
	box = detection.box
	mask = detection.mask
	color = np.random.randint(0, 256, size=3).tolist()

	cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
	cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

	if mask is not None:
	mask_uint8 = (mask * 255).astype(np.uint8)
	contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cv2.drawContours(image_cv2, contours, -1, color, 2)

	return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)

	def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
	annotated_image = annotate(image, detections)
	return annotated_image

	def load_image(image: Union[str, Image.Image]) -> Image.Image:
	if isinstance(image, str) and image.startswith("http"):
	image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
	elif isinstance(image, str):
	image = Image.open(image).convert("RGB")
	else:
	image = image.convert("RGB")
	return image

	def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
	boxes = []
	for result in detection_results:
	xyxy = result.box.xyxy
	boxes.append(xyxy)
	return [boxes]

	def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
	contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if len(contours) == 0:
	return np.array([])
	largest_contour = max(contours, key=cv2.contourArea)
	return largest_contour

	def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
	masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
	masks = (masks > 0).astype(np.uint8)
	if polygon_refinement:
	for idx, mask in enumerate(masks):
	shape = mask.shape
	polygon = mask_to_polygon(mask)
	masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
	return list(masks)

	@spaces.GPU
	def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
	detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
	object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda")
	labels = [label if label.endswith(".") else label+"." for label in labels]
	results = object_detector(image, candidate_labels=labels, threshold=threshold)
	return [DetectionResult.from_dict(result) for result in results]

	@spaces.GPU
	def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
	segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
	segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda")
	processor = AutoProcessor.from_pretrained(segmenter_id)
	boxes = get_boxes(detection_results)
	inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda")
	outputs = segmentator(**inputs)
	masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
	masks = refine_masks(masks, polygon_refinement)
	for detection_result, mask in zip(detection_results, masks):
	detection_result.mask = mask
	return detection_results

	def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
	image = load_image(image)
	detections = detect(image, labels, threshold, detector_id)
	detections = segment(image, detections, polygon_refinement, segmenter_id)
	return np.array(image), detections

	def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
	y, x = np.where(mask)
	return x.min(), y.min(), x.max(), y.max()

	def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
	mask = detection.mask
	xmin, ymin, xmax, ymax = mask_to_min_max(mask)
	insect_crop = original_image[ymin:ymax, xmin:xmax]
	mask_crop = mask[ymin:ymax, xmin:xmax]
	insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
	x_offset, y_offset = detection.box.xmin, detection.box.ymin
	x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
	inverse_mask = cv2.bitwise_not(mask_crop)
	bg_region = background[y_offset:y_end, x_offset:x_end]
	bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask)
	combined = cv2.add(insect, bg_ready)
	background[y_offset:y_end, x_offset:x_end] = combined

	def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
	yellow_background = np.full((image.shape[0], image.shape[1], 3), (0, 255, 255), dtype=np.uint8)
	for detection in detections:
	if detection.mask is not None:
	extract_and_paste_insect(image, detection, yellow_background)
	return yellow_background

	def draw_classification_boxes(image_with_insects, detections):
	for detection in detections:
	label = detection.label
	score = detection.score
	box = detection.box
	color = np.random.randint(0, 256, size=3).tolist()

	cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
	(text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
	cv2.rectangle(
	image_with_insects,
	(box.xmin, box.ymin - text_height - baseline),
	(box.xmin + text_width, box.ymin),
	color,
	thickness=cv2.FILLED
	)
	cv2.putText(
	image_with_insects,
	f"{label}: {score:.2f}",
	(box.xmin, box.ymin - baseline),
	cv2.FONT_HERSHEY_SIMPLEX,
	0.5,
	(255, 255, 255),
	2
	)
	return image_with_insects

	def plot_detections_plotly(image: np.ndarray, detections: List[DetectionResult]) -> str:
	from plotly import graph_objects as go
	import plotly.express as px
	fig = px.imshow(image)
	class_colors = {i: f'rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})' for i in range(len(detections))}
	for idx, detection in enumerate(detections):
	label = detection.label
	box = detection.box
	score = detection.score
	mask = detection.mask
	polygon = mask_to_polygon(mask)
	fig.add_trace(go.Scatter(
	x=[point[0] for point in polygon] + [polygon[0][0]],
	y=[point[1] for point in polygon] + [polygon[0][1]],
	mode='lines',
	line=dict(color=class_colors[idx], width=2),
	fill='toself',
	name=f"{label}: {score:.2f}"
	))
	xmin, ymin, xmax, ymax = box.xyxy
	fig.add_shape(
	type="rect",
	x0=xmin, y0=ymin, x1=xmax, y1=ymax,
	line=dict(color=class_colors[idx])
	)
	fig.add_annotation(
	x=(xmin + xmax) // 2, y=(ymin + ymax) // 2,
	text=f"{label}: {score:.2f}",
	)
	fig.update_layout(xaxis=dict(visible=False), yaxis=dict(visible=False))
	file_path = "/tmp/plotly_image.html"
	fig.write_html(file_path)
	return file_path

	def process_image(image):
	labels = ["insect"]
	original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True)
	annotated_image = plot_detections(original_image, detections)
	yellow_background_with_insects = create_yellow_background_with_insects(np.array(original_image), detections)
	yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects.copy(), detections)
	plotly_image_path = plot_detections_plotly(original_image, detections)
	return annotated_image, yellow_background_with_boxes, plotly_image_path

	gr.Interface(
	fn=process_image,
	inputs=gr.Image(type="pil"),
	outputs=[gr.Image(type="numpy"), gr.Image(type="numpy"), gr.HTML()],
	title="🐞 InsectSAM + GroundingDINO Inference"
	).launch()