|
from typing import Any, List, Tuple, Union |
|
|
|
import numpy as np |
|
|
|
from inference.core.entities.responses.inference import ( |
|
InferenceResponseImage, |
|
InstanceSegmentationInferenceResponse, |
|
InstanceSegmentationPrediction, |
|
Point, |
|
) |
|
from inference.core.exceptions import InvalidMaskDecodeArgument |
|
from inference.core.models.roboflow import OnnxRoboflowInferenceModel |
|
from inference.core.models.types import PreprocessReturnMetadata |
|
from inference.core.models.utils.validate import ( |
|
get_num_classes_from_model_prediction_shape, |
|
) |
|
from inference.core.nms import w_np_non_max_suppression |
|
from inference.core.utils.postprocess import ( |
|
masks2poly, |
|
post_process_bboxes, |
|
post_process_polygons, |
|
process_mask_accurate, |
|
process_mask_fast, |
|
process_mask_tradeoff, |
|
) |
|
|
|
DEFAULT_CONFIDENCE = 0.4 |
|
DEFAULT_IOU_THRESH = 0.3 |
|
DEFAULT_CLASS_AGNOSTIC_NMS = False |
|
DEFAUlT_MAX_DETECTIONS = 300 |
|
DEFAULT_MAX_CANDIDATES = 3000 |
|
DEFAULT_MASK_DECODE_MODE = "accurate" |
|
DEFAULT_TRADEOFF_FACTOR = 0.0 |
|
|
|
PREDICTIONS_TYPE = List[List[List[float]]] |
|
|
|
|
|
class InstanceSegmentationBaseOnnxRoboflowInferenceModel(OnnxRoboflowInferenceModel): |
|
"""Roboflow ONNX Instance Segmentation model. |
|
|
|
This class implements an instance segmentation specific inference method |
|
for ONNX models provided by Roboflow. |
|
""" |
|
|
|
task_type = "instance-segmentation" |
|
num_masks = 32 |
|
|
|
def infer( |
|
self, |
|
image: Any, |
|
class_agnostic_nms: bool = False, |
|
confidence: float = DEFAULT_CONFIDENCE, |
|
disable_preproc_auto_orient: bool = False, |
|
disable_preproc_contrast: bool = False, |
|
disable_preproc_grayscale: bool = False, |
|
disable_preproc_static_crop: bool = False, |
|
iou_threshold: float = DEFAULT_IOU_THRESH, |
|
mask_decode_mode: str = DEFAULT_MASK_DECODE_MODE, |
|
max_candidates: int = DEFAULT_MAX_CANDIDATES, |
|
max_detections: int = DEFAUlT_MAX_DETECTIONS, |
|
return_image_dims: bool = False, |
|
tradeoff_factor: float = DEFAULT_TRADEOFF_FACTOR, |
|
**kwargs, |
|
) -> Union[PREDICTIONS_TYPE, Tuple[PREDICTIONS_TYPE, List[Tuple[int, int]]]]: |
|
""" |
|
Process an image or list of images for instance segmentation. |
|
|
|
Args: |
|
image (Any): An image or a list of images for processing. |
|
class_agnostic_nms (bool, optional): Whether to use class-agnostic non-maximum suppression. Defaults to False. |
|
confidence (float, optional): Confidence threshold for predictions. Defaults to 0.5. |
|
iou_threshold (float, optional): IoU threshold for non-maximum suppression. Defaults to 0.5. |
|
mask_decode_mode (str, optional): Decoding mode for masks. Choices are "accurate", "tradeoff", and "fast". Defaults to "accurate". |
|
max_candidates (int, optional): Maximum number of candidate detections. Defaults to 3000. |
|
max_detections (int, optional): Maximum number of detections after non-maximum suppression. Defaults to 300. |
|
return_image_dims (bool, optional): Whether to return the dimensions of the processed images. Defaults to False. |
|
tradeoff_factor (float, optional): Tradeoff factor used when `mask_decode_mode` is set to "tradeoff". Must be in [0.0, 1.0]. Defaults to 0.5. |
|
disable_preproc_auto_orient (bool, optional): If true, the auto orient preprocessing step is disabled for this call. Default is False. |
|
disable_preproc_contrast (bool, optional): If true, the auto contrast preprocessing step is disabled for this call. Default is False. |
|
disable_preproc_grayscale (bool, optional): If true, the grayscale preprocessing step is disabled for this call. Default is False. |
|
disable_preproc_static_crop (bool, optional): If true, the static crop preprocessing step is disabled for this call. Default is False. |
|
**kwargs: Additional parameters to customize the inference process. |
|
|
|
Returns: |
|
Union[List[List[List[float]]], Tuple[List[List[List[float]]], List[Tuple[int, int]]]]: The list of predictions, with each prediction being a list of lists. Optionally, also returns the dimensions of the processed images. |
|
|
|
Raises: |
|
InvalidMaskDecodeArgument: If an invalid `mask_decode_mode` is provided or if the `tradeoff_factor` is outside the allowed range. |
|
|
|
Notes: |
|
- Processes input images and normalizes them. |
|
- Makes predictions using the ONNX runtime. |
|
- Applies non-maximum suppression to the predictions. |
|
- Decodes the masks according to the specified mode. |
|
""" |
|
return super().infer( |
|
image, |
|
class_agnostic_nms=class_agnostic_nms, |
|
confidence=confidence, |
|
disable_preproc_auto_orient=disable_preproc_auto_orient, |
|
disable_preproc_contrast=disable_preproc_contrast, |
|
disable_preproc_grayscale=disable_preproc_grayscale, |
|
disable_preproc_static_crop=disable_preproc_static_crop, |
|
iou_threshold=iou_threshold, |
|
mask_decode_mode=mask_decode_mode, |
|
max_candidates=max_candidates, |
|
max_detections=max_detections, |
|
return_image_dims=return_image_dims, |
|
tradeoff_factor=tradeoff_factor, |
|
) |
|
|
|
def postprocess( |
|
self, |
|
predictions: Tuple[np.ndarray, np.ndarray], |
|
preprocess_return_metadata: PreprocessReturnMetadata, |
|
**kwargs, |
|
) -> Union[ |
|
InstanceSegmentationInferenceResponse, |
|
List[InstanceSegmentationInferenceResponse], |
|
]: |
|
predictions, protos = predictions |
|
predictions = w_np_non_max_suppression( |
|
predictions, |
|
conf_thresh=kwargs["confidence"], |
|
iou_thresh=kwargs["iou_threshold"], |
|
class_agnostic=kwargs["class_agnostic_nms"], |
|
max_detections=kwargs["max_detections"], |
|
max_candidate_detections=kwargs["max_candidates"], |
|
num_masks=self.num_masks, |
|
) |
|
infer_shape = (self.img_size_h, self.img_size_w) |
|
predictions = np.array(predictions) |
|
masks = [] |
|
mask_decode_mode = kwargs["mask_decode_mode"] |
|
tradeoff_factor = kwargs["tradeoff_factor"] |
|
img_in_shape = preprocess_return_metadata["im_shape"] |
|
if predictions.shape[1] > 0: |
|
for i, (pred, proto, img_dim) in enumerate( |
|
zip(predictions, protos, preprocess_return_metadata["img_dims"]) |
|
): |
|
if mask_decode_mode == "accurate": |
|
batch_masks = process_mask_accurate( |
|
proto, pred[:, 7:], pred[:, :4], img_in_shape[2:] |
|
) |
|
output_mask_shape = img_in_shape[2:] |
|
elif mask_decode_mode == "tradeoff": |
|
if not 0 <= tradeoff_factor <= 1: |
|
raise InvalidMaskDecodeArgument( |
|
f"Invalid tradeoff_factor: {tradeoff_factor}. Must be in [0.0, 1.0]" |
|
) |
|
batch_masks = process_mask_tradeoff( |
|
proto, |
|
pred[:, 7:], |
|
pred[:, :4], |
|
img_in_shape[2:], |
|
tradeoff_factor, |
|
) |
|
output_mask_shape = batch_masks.shape[1:] |
|
elif mask_decode_mode == "fast": |
|
batch_masks = process_mask_fast( |
|
proto, pred[:, 7:], pred[:, :4], img_in_shape[2:] |
|
) |
|
output_mask_shape = batch_masks.shape[1:] |
|
else: |
|
raise InvalidMaskDecodeArgument( |
|
f"Invalid mask_decode_mode: {mask_decode_mode}. Must be one of ['accurate', 'fast', 'tradeoff']" |
|
) |
|
polys = masks2poly(batch_masks) |
|
pred[:, :4] = post_process_bboxes( |
|
[pred[:, :4]], |
|
infer_shape, |
|
[img_dim], |
|
self.preproc, |
|
resize_method=self.resize_method, |
|
disable_preproc_static_crop=preprocess_return_metadata[ |
|
"disable_preproc_static_crop" |
|
], |
|
)[0] |
|
polys = post_process_polygons( |
|
img_dim, |
|
polys, |
|
output_mask_shape, |
|
self.preproc, |
|
resize_method=self.resize_method, |
|
) |
|
masks.append(polys) |
|
else: |
|
masks.extend([[]] * len(predictions)) |
|
return self.make_response( |
|
predictions, masks, preprocess_return_metadata["img_dims"], **kwargs |
|
) |
|
|
|
def preprocess( |
|
self, image: Any, **kwargs |
|
) -> Tuple[np.ndarray, PreprocessReturnMetadata]: |
|
img_in, img_dims = self.load_image( |
|
image, |
|
disable_preproc_auto_orient=kwargs.get("disable_preproc_auto_orient"), |
|
disable_preproc_contrast=kwargs.get("disable_preproc_contrast"), |
|
disable_preproc_grayscale=kwargs.get("disable_preproc_grayscale"), |
|
disable_preproc_static_crop=kwargs.get("disable_preproc_static_crop"), |
|
) |
|
|
|
img_in /= 255.0 |
|
return img_in, PreprocessReturnMetadata( |
|
{ |
|
"img_dims": img_dims, |
|
"im_shape": img_in.shape, |
|
"disable_preproc_static_crop": kwargs.get( |
|
"disable_preproc_static_crop" |
|
), |
|
} |
|
) |
|
|
|
def make_response( |
|
self, |
|
predictions: List[List[List[float]]], |
|
masks: List[List[List[float]]], |
|
img_dims: List[Tuple[int, int]], |
|
class_filter: List[str] = [], |
|
**kwargs, |
|
) -> Union[ |
|
InstanceSegmentationInferenceResponse, |
|
List[InstanceSegmentationInferenceResponse], |
|
]: |
|
""" |
|
Create instance segmentation inference response objects for the provided predictions and masks. |
|
|
|
Args: |
|
predictions (List[List[List[float]]]): List of prediction data, one for each image. |
|
masks (List[List[List[float]]]): List of masks corresponding to the predictions. |
|
img_dims (List[Tuple[int, int]]): List of image dimensions corresponding to the processed images. |
|
class_filter (List[str], optional): List of class names to filter predictions by. Defaults to an empty list (no filtering). |
|
|
|
Returns: |
|
Union[InstanceSegmentationInferenceResponse, List[InstanceSegmentationInferenceResponse]]: A single instance segmentation response or a list of instance segmentation responses based on the number of processed images. |
|
|
|
Notes: |
|
- For each image, constructs an `InstanceSegmentationInferenceResponse` object. |
|
- Each response contains a list of `InstanceSegmentationPrediction` objects. |
|
""" |
|
responses = [ |
|
InstanceSegmentationInferenceResponse( |
|
predictions=[ |
|
InstanceSegmentationPrediction( |
|
|
|
**{ |
|
"x": (pred[0] + pred[2]) / 2, |
|
"y": (pred[1] + pred[3]) / 2, |
|
"width": pred[2] - pred[0], |
|
"height": pred[3] - pred[1], |
|
"points": [Point(x=point[0], y=point[1]) for point in mask], |
|
"confidence": pred[4], |
|
"class": self.class_names[int(pred[6])], |
|
"class_id": int(pred[6]), |
|
} |
|
) |
|
for pred, mask in zip(batch_predictions, batch_masks) |
|
if not class_filter |
|
or self.class_names[int(pred[6])] in class_filter |
|
], |
|
image=InferenceResponseImage( |
|
width=img_dims[ind][1], height=img_dims[ind][0] |
|
), |
|
) |
|
for ind, (batch_predictions, batch_masks) in enumerate( |
|
zip(predictions, masks) |
|
) |
|
] |
|
return responses |
|
|
|
def predict(self, img_in: np.ndarray, **kwargs) -> Tuple[np.ndarray, np.ndarray]: |
|
"""Runs inference on the ONNX model. |
|
|
|
Args: |
|
img_in (np.ndarray): The preprocessed image(s) to run inference on. |
|
|
|
Returns: |
|
Tuple[np.ndarray, np.ndarray]: The ONNX model predictions and the ONNX model protos. |
|
|
|
Raises: |
|
NotImplementedError: This method must be implemented by a subclass. |
|
""" |
|
raise NotImplementedError("predict must be implemented by a subclass") |
|
|
|
def validate_model_classes(self) -> None: |
|
output_shape = self.get_model_output_shape() |
|
num_classes = get_num_classes_from_model_prediction_shape( |
|
output_shape[2], masks=self.num_masks |
|
) |
|
try: |
|
assert num_classes == self.num_classes |
|
except AssertionError: |
|
raise ValueError( |
|
f"Number of classes in model ({num_classes}) does not match the number of classes in the environment ({self.num_classes})" |
|
) |
|
|