Spaces:
Running
Running
from typing import Dict, List, Union | |
import cv2 | |
import numpy as np | |
import PIL | |
import supervision as sv | |
from PIL import Image | |
from smolagents import AgentImage, Tool | |
def get_class_ids_from_labels(labels: List[str]): | |
unique_labels = list(set(labels)) | |
label_to_id = {label: idx for idx, label in enumerate(unique_labels)} | |
class_ids = [label_to_id[label] for label in labels] | |
return class_ids | |
def create_detections_from_image_segmentation_output( | |
image_segmentation_output: List[Dict[str, Union[str, Dict[str, float], List]]], | |
): | |
masks = [detection["mask"] for detection in image_segmentation_output] | |
xyxy = [] | |
for mask in masks: | |
mask_array = np.array(mask) | |
y_indices, x_indices = np.where(mask_array > 0) | |
if len(y_indices) > 0 and len(x_indices) > 0: | |
xmin, xmax = np.min(x_indices), np.max(x_indices) | |
ymin, ymax = np.min(y_indices), np.max(y_indices) | |
xyxy.append((xmin, ymin, xmax, ymax)) | |
class_names = [detection["label"] for detection in image_segmentation_output] | |
class_ids = get_class_ids_from_labels(class_names) | |
detections = sv.Detections( | |
xyxy=np.array(xyxy), | |
mask=np.array(masks), | |
class_id=np.array(class_ids), | |
) | |
return detections | |
class MaskAnnotatorTool(Tool): | |
name = "mask_annotator" | |
description = """ | |
Given an image and a list of detections, draw the masks on the image. | |
The image is a PIL image. | |
The detections are an object of type supervision.Detections. You can use the task_inference_output_converter tool to obtain the proper format for the detections. | |
The output is the image with the masks drawn on it. | |
This tool is heavily unoptimized for large images, so it is recommended to resize the image to a smaller size before using this tool. | |
""" | |
inputs = { | |
"image": { | |
"type": "image", | |
"description": "The image to annotate the masks on. Resize the image to a smaller size before using this tool.", | |
}, | |
"detections": { | |
"type": "object", | |
"description": """ | |
The detections to annotate on the image. | |
The detections are an object of type supervision.Detections. | |
You can use the task_inference_output_converter tool to obtain the proper format for the detections. | |
""", | |
}, | |
} | |
output_type = "image" | |
def __init__(self): | |
super().__init__() | |
def forward( | |
self, | |
image: AgentImage, | |
detections: sv.Detections, | |
): | |
mask_annotator = sv.MaskAnnotator() | |
annotated_image = mask_annotator.annotate(scene=image, detections=detections) | |
return annotated_image | |