Spaces:
Running
on
Zero
Running
on
Zero
import os | |
os.system('pip install gradio==4.29.0') | |
import random | |
from dataclasses import dataclass | |
from typing import Any, List, Dict, Optional, Union, Tuple | |
import cv2 | |
import torch | |
import requests | |
import numpy as np | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline | |
import gradio as gr | |
import spaces | |
import json | |
class BoundingBox: | |
xmin: int | |
ymin: int | |
xmax: int | |
ymax: int | |
def xyxy(self) -> List[float]: | |
return [self.xmin, self.ymin, self.xmax, self.ymax] | |
class DetectionResult: | |
score: float | |
label: str | |
box: BoundingBox | |
mask: Optional[np.ndarray] = None | |
def from_dict(cls, detection_dict: Dict) -> 'DetectionResult': | |
return cls( | |
score=detection_dict['score'], | |
label=detection_dict['label'], | |
box=BoundingBox( | |
xmin=detection_dict['box']['xmin'], | |
ymin=detection_dict['box']['ymin'], | |
xmax=detection_dict['box']['xmax'], | |
ymax=detection_dict['box']['ymax'] | |
) | |
) | |
def mask_to_min_max(mask): | |
"""Convert mask to min and max coordinates of the bounding box.""" | |
y, x = np.where(mask) | |
xmin, xmax = x.min(), x.max() | |
ymin, ymax = y.min(), y.max() | |
return xmin, ymin, xmax, ymax | |
def extract_and_paste_insect(original_image, detection, background): | |
mask = detection.mask | |
xmin, ymin, xmax, ymax = mask_to_min_max(mask) | |
insect_crop = original_image[ymin:ymax, xmin:xmax] | |
mask_crop = mask[ymin:ymax, xmin:xmax] | |
insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop) | |
x_offset, y_offset = detection.box.xmin, detection.box.ymin | |
x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0] | |
inverse_mask = cv2.bitwise_not(mask_crop) | |
bg_region = background[y_offset:y_end, x_offset:x_end] | |
bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask) | |
combined = cv2.add(insect, bg_ready) | |
background[y_offset:y_end, x_offset:x_end] = combined | |
def create_yellow_background_with_insects(image, detections): | |
# Create a plain yellow background | |
yellow_background = np.full_like(image, (0, 255, 255), dtype=np.uint8) | |
# Extract and paste each insect on the background | |
for detection in detections: | |
if detection.mask is not None: | |
extract_and_paste_insect(image, detection, yellow_background) | |
return yellow_background | |
def run_length_encoding(mask): | |
pixels = mask.flatten() | |
rle = [] | |
last_val = 0 | |
count = 0 | |
for pixel in pixels: | |
if pixel == last_val: | |
count += 1 | |
else: | |
if count > 0: | |
rle.append(count) | |
count = 1 | |
last_val = pixel | |
if count > 0: | |
rle.append(count) | |
return rle | |
def detections_to_json(detections): | |
detections_list = [] | |
for detection in detections: | |
detection_dict = { | |
"score": detection.score, | |
"label": detection.label, | |
"box": { | |
"xmin": detection.box.xmin, | |
"ymin": detection.box.ymin, | |
"xmax": detection.box.xmax, | |
"ymax": detection.box.ymax | |
}, | |
"mask": run_length_encoding(detection.mask) if detection.mask is not None else None | |
} | |
detections_list.append(detection_dict) | |
return detections_list | |
def process_image(image): | |
labels = ["insect"] | |
original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True) | |
yellow_background_with_insects = create_yellow_background_with_insects(np.array(original_image), detections) | |
detections_json = detections_to_json(detections) | |
json_output_path = "insect_detections.json" | |
with open(json_output_path, 'w') as json_file: | |
json.dump(detections_json, json_file, indent=4) | |
return yellow_background_with_insects, json.dumps(detections_json, separators=(',', ':')) | |
gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil"), | |
outputs=[gr.Image(type="numpy"), gr.Textbox()], | |
title="π InsectSAM + GroundingDINO Inference", | |
).launch() | |