|
from typing import Any, List, Optional, Tuple, Union |
|
|
|
import numpy as np |
|
|
|
from inference.core.entities.responses.inference import ( |
|
InferenceResponseImage, |
|
ObjectDetectionInferenceResponse, |
|
ObjectDetectionPrediction, |
|
) |
|
from inference.core.env import FIX_BATCH_SIZE, MAX_BATCH_SIZE |
|
from inference.core.logger import logger |
|
from inference.core.models.defaults import ( |
|
DEFAULT_CLASS_AGNOSTIC_NMS, |
|
DEFAULT_CONFIDENCE, |
|
DEFAULT_IOU_THRESH, |
|
DEFAULT_MAX_CANDIDATES, |
|
DEFAUlT_MAX_DETECTIONS, |
|
) |
|
from inference.core.models.roboflow import OnnxRoboflowInferenceModel |
|
from inference.core.models.types import PreprocessReturnMetadata |
|
from inference.core.models.utils.validate import ( |
|
get_num_classes_from_model_prediction_shape, |
|
) |
|
from inference.core.nms import w_np_non_max_suppression |
|
from inference.core.utils.postprocess import post_process_bboxes |
|
|
|
|
|
class ObjectDetectionBaseOnnxRoboflowInferenceModel(OnnxRoboflowInferenceModel): |
|
"""Roboflow ONNX Object detection model. This class implements an object detection specific infer method.""" |
|
|
|
task_type = "object-detection" |
|
box_format = "xywh" |
|
|
|
def infer( |
|
self, |
|
image: Any, |
|
class_agnostic_nms: bool = DEFAULT_CLASS_AGNOSTIC_NMS, |
|
confidence: float = DEFAULT_CONFIDENCE, |
|
disable_preproc_auto_orient: bool = False, |
|
disable_preproc_contrast: bool = False, |
|
disable_preproc_grayscale: bool = False, |
|
disable_preproc_static_crop: bool = False, |
|
iou_threshold: float = DEFAULT_IOU_THRESH, |
|
fix_batch_size: bool = False, |
|
max_candidates: int = DEFAULT_MAX_CANDIDATES, |
|
max_detections: int = DEFAUlT_MAX_DETECTIONS, |
|
return_image_dims: bool = False, |
|
**kwargs, |
|
) -> Any: |
|
""" |
|
Runs object detection inference on one or multiple images and returns the detections. |
|
|
|
Args: |
|
image (Any): The input image or a list of images to process. |
|
class_agnostic_nms (bool, optional): Whether to use class-agnostic non-maximum suppression. Defaults to False. |
|
confidence (float, optional): Confidence threshold for predictions. Defaults to 0.5. |
|
iou_threshold (float, optional): IoU threshold for non-maximum suppression. Defaults to 0.5. |
|
fix_batch_size (bool, optional): If True, fix the batch size for predictions. Useful when the model requires a fixed batch size. Defaults to False. |
|
max_candidates (int, optional): Maximum number of candidate detections. Defaults to 3000. |
|
max_detections (int, optional): Maximum number of detections after non-maximum suppression. Defaults to 300. |
|
return_image_dims (bool, optional): Whether to return the dimensions of the processed images along with the predictions. Defaults to False. |
|
disable_preproc_auto_orient (bool, optional): If true, the auto orient preprocessing step is disabled for this call. Default is False. |
|
disable_preproc_contrast (bool, optional): If true, the auto contrast preprocessing step is disabled for this call. Default is False. |
|
disable_preproc_grayscale (bool, optional): If true, the grayscale preprocessing step is disabled for this call. Default is False. |
|
disable_preproc_static_crop (bool, optional): If true, the static crop preprocessing step is disabled for this call. Default is False. |
|
*args: Variable length argument list. |
|
**kwargs: Arbitrary keyword arguments. |
|
|
|
Returns: |
|
Union[List[ObjectDetectionInferenceResponse], ObjectDetectionInferenceResponse]: One or multiple object detection inference responses based on the number of processed images. Each response contains a list of predictions. If `return_image_dims` is True, it will return a tuple with predictions and image dimensions. |
|
|
|
Raises: |
|
ValueError: If batching is not enabled for the model and more than one image is passed for processing. |
|
""" |
|
return super().infer( |
|
image, |
|
class_agnostic_nms=class_agnostic_nms, |
|
confidence=confidence, |
|
disable_preproc_auto_orient=disable_preproc_auto_orient, |
|
disable_preproc_contrast=disable_preproc_contrast, |
|
disable_preproc_grayscale=disable_preproc_grayscale, |
|
disable_preproc_static_crop=disable_preproc_static_crop, |
|
iou_threshold=iou_threshold, |
|
fix_batch_size=fix_batch_size, |
|
max_candidates=max_candidates, |
|
max_detections=max_detections, |
|
return_image_dims=return_image_dims, |
|
**kwargs, |
|
) |
|
|
|
def make_response( |
|
self, |
|
predictions: List[List[float]], |
|
img_dims: List[Tuple[int, int]], |
|
class_filter: Optional[List[str]] = None, |
|
*args, |
|
**kwargs, |
|
) -> List[ObjectDetectionInferenceResponse]: |
|
"""Constructs object detection response objects based on predictions. |
|
|
|
Args: |
|
predictions (List[List[float]]): The list of predictions. |
|
img_dims (List[Tuple[int, int]]): Dimensions of the images. |
|
class_filter (Optional[List[str]]): A list of class names to filter, if provided. |
|
|
|
Returns: |
|
List[ObjectDetectionInferenceResponse]: A list of response objects containing object detection predictions. |
|
""" |
|
|
|
if isinstance(img_dims, dict) and "img_dims" in img_dims: |
|
img_dims = img_dims["img_dims"] |
|
|
|
predictions = predictions[ |
|
: len(img_dims) |
|
] |
|
responses = [ |
|
ObjectDetectionInferenceResponse( |
|
predictions=[ |
|
ObjectDetectionPrediction( |
|
|
|
**{ |
|
"x": (pred[0] + pred[2]) / 2, |
|
"y": (pred[1] + pred[3]) / 2, |
|
"width": pred[2] - pred[0], |
|
"height": pred[3] - pred[1], |
|
"confidence": pred[4], |
|
"class": self.class_names[int(pred[6])], |
|
"class_id": int(pred[6]), |
|
} |
|
) |
|
for pred in batch_predictions |
|
if not class_filter |
|
or self.class_names[int(pred[6])] in class_filter |
|
], |
|
image=InferenceResponseImage( |
|
width=img_dims[ind][1], height=img_dims[ind][0] |
|
), |
|
) |
|
for ind, batch_predictions in enumerate(predictions) |
|
] |
|
return responses |
|
|
|
def postprocess( |
|
self, |
|
predictions: Tuple[np.ndarray, ...], |
|
preproc_return_metadata: PreprocessReturnMetadata, |
|
class_agnostic_nms=DEFAULT_CLASS_AGNOSTIC_NMS, |
|
confidence: float = DEFAULT_CONFIDENCE, |
|
iou_threshold: float = DEFAULT_IOU_THRESH, |
|
max_candidates: int = DEFAULT_MAX_CANDIDATES, |
|
max_detections: int = DEFAUlT_MAX_DETECTIONS, |
|
return_image_dims: bool = False, |
|
**kwargs, |
|
) -> List[ObjectDetectionInferenceResponse]: |
|
"""Postprocesses the object detection predictions. |
|
|
|
Args: |
|
predictions (np.ndarray): Raw predictions from the model. |
|
img_dims (List[Tuple[int, int]]): Dimensions of the images. |
|
class_agnostic_nms (bool): Whether to apply class-agnostic non-max suppression. Default is False. |
|
confidence (float): Confidence threshold for filtering detections. Default is 0.5. |
|
iou_threshold (float): IoU threshold for non-max suppression. Default is 0.5. |
|
max_candidates (int): Maximum number of candidate detections. Default is 3000. |
|
max_detections (int): Maximum number of final detections. Default is 300. |
|
|
|
Returns: |
|
List[ObjectDetectionInferenceResponse]: The post-processed predictions. |
|
""" |
|
predictions = predictions[0] |
|
|
|
predictions = w_np_non_max_suppression( |
|
predictions, |
|
conf_thresh=confidence, |
|
iou_thresh=iou_threshold, |
|
class_agnostic=class_agnostic_nms, |
|
max_detections=max_detections, |
|
max_candidate_detections=max_candidates, |
|
box_format=self.box_format, |
|
) |
|
|
|
infer_shape = (self.img_size_h, self.img_size_w) |
|
img_dims = preproc_return_metadata["img_dims"] |
|
predictions = post_process_bboxes( |
|
predictions, |
|
infer_shape, |
|
img_dims, |
|
self.preproc, |
|
resize_method=self.resize_method, |
|
disable_preproc_static_crop=preproc_return_metadata[ |
|
"disable_preproc_static_crop" |
|
], |
|
) |
|
return self.make_response(predictions, img_dims, **kwargs) |
|
|
|
def preprocess( |
|
self, |
|
image: Any, |
|
disable_preproc_auto_orient: bool = False, |
|
disable_preproc_contrast: bool = False, |
|
disable_preproc_grayscale: bool = False, |
|
disable_preproc_static_crop: bool = False, |
|
fix_batch_size: bool = False, |
|
**kwargs, |
|
) -> Tuple[np.ndarray, PreprocessReturnMetadata]: |
|
"""Preprocesses an object detection inference request. |
|
|
|
Args: |
|
request (ObjectDetectionInferenceRequest): The request object containing images. |
|
|
|
Returns: |
|
Tuple[np.ndarray, List[Tuple[int, int]]]: Preprocessed image inputs and corresponding dimensions. |
|
""" |
|
img_in, img_dims = self.load_image( |
|
image, |
|
disable_preproc_auto_orient=disable_preproc_auto_orient, |
|
disable_preproc_contrast=disable_preproc_contrast, |
|
disable_preproc_grayscale=disable_preproc_grayscale, |
|
disable_preproc_static_crop=disable_preproc_static_crop, |
|
) |
|
|
|
img_in /= 255.0 |
|
|
|
if self.batching_enabled: |
|
batch_padding = 0 |
|
if FIX_BATCH_SIZE or fix_batch_size: |
|
if MAX_BATCH_SIZE == float("inf"): |
|
logger.warn( |
|
"Requested fix_batch_size but MAX_BATCH_SIZE is not set. Using dynamic batching." |
|
) |
|
batch_padding = 0 |
|
else: |
|
batch_padding = MAX_BATCH_SIZE - img_in.shape[0] |
|
if batch_padding < 0: |
|
raise ValueError( |
|
f"Requested fix_batch_size but passed in {img_in.shape[0]} images " |
|
f"when the model's batch size is {MAX_BATCH_SIZE}\n" |
|
f"Consider turning off fix_batch_size, changing `MAX_BATCH_SIZE` in" |
|
f"your inference server config, or passing at most {MAX_BATCH_SIZE} images at a time" |
|
) |
|
width_remainder = img_in.shape[2] % 32 |
|
height_remainder = img_in.shape[3] % 32 |
|
if width_remainder > 0: |
|
width_padding = 32 - (img_in.shape[2] % 32) |
|
else: |
|
width_padding = 0 |
|
if height_remainder > 0: |
|
height_padding = 32 - (img_in.shape[3] % 32) |
|
else: |
|
height_padding = 0 |
|
img_in = np.pad( |
|
img_in, |
|
((0, batch_padding), (0, 0), (0, width_padding), (0, height_padding)), |
|
"constant", |
|
) |
|
|
|
return img_in, PreprocessReturnMetadata( |
|
{ |
|
"img_dims": img_dims, |
|
"disable_preproc_static_crop": disable_preproc_static_crop, |
|
} |
|
) |
|
|
|
def predict(self, img_in: np.ndarray, **kwargs) -> Tuple[np.ndarray]: |
|
"""Runs inference on the ONNX model. |
|
|
|
Args: |
|
img_in (np.ndarray): The preprocessed image(s) to run inference on. |
|
|
|
Returns: |
|
Tuple[np.ndarray]: The ONNX model predictions. |
|
|
|
Raises: |
|
NotImplementedError: This method must be implemented by a subclass. |
|
""" |
|
raise NotImplementedError("predict must be implemented by a subclass") |
|
|
|
def validate_model_classes(self) -> None: |
|
output_shape = self.get_model_output_shape() |
|
num_classes = get_num_classes_from_model_prediction_shape( |
|
output_shape[2], masks=0 |
|
) |
|
try: |
|
assert num_classes == self.num_classes |
|
except AssertionError: |
|
raise ValueError( |
|
f"Number of classes in model ({num_classes}) does not match the number of classes in the environment ({self.num_classes})" |
|
) |
|
|