# Copyright (C) 2021-2024, Mindee. # This program is licensed under the Apache License 2.0. # See LICENSE or go to for full license details. from copy import deepcopy from math import ceil from typing import List, Optional, Tuple, Union import cv2 import numpy as np from .common_types import BoundingBox, Polygon4P __all__ = [ "bbox_to_polygon", "polygon_to_bbox", "resolve_enclosing_bbox", "resolve_enclosing_rbbox", "rotate_boxes", "compute_expanded_shape", "rotate_image", "estimate_page_angle", "convert_to_relative_coords", "rotate_abs_geoms", "extract_crops", "extract_rcrops", ] def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P: """Convert a bounding box to a polygon Args: ---- bbox: a bounding box Returns: ------- a polygon """ return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1] def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox: """Convert a polygon to a bounding box Args: ---- polygon: a polygon Returns: ------- a bounding box """ x, y = zip(*polygon) return (min(x), min(y)), (max(x), max(y)) def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]: """Compute enclosing bbox either from: Args: ---- bboxes: boxes in one of the following formats: - an array of boxes: (*, 5), where boxes have this shape: (xmin, ymin, xmax, ymax, score) - a list of BoundingBox Returns: ------- a (1, 5) array (enclosing boxarray), or a BoundingBox """ if isinstance(bboxes, np.ndarray): xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1) return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()]) else: x, y = zip(*[point for box in bboxes for point in box]) return (min(x), min(y)), (max(x), max(y)) def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024) -> np.ndarray: """Compute enclosing rotated bbox either from: Args: ---- rbboxes: boxes in one of the following formats: - an array of boxes: (*, 5), where boxes have this shape: (xmin, ymin, xmax, ymax, score) - a list of BoundingBox intermed_size: size of the intermediate image Returns: ------- a (1, 5) array (enclosing boxarray), or a BoundingBox """ cloud: np.ndarray = np.concatenate(rbboxes, axis=0) # Convert to absolute for minAreaRect cloud *= intermed_size rect = cv2.minAreaRect(cloud.astype(np.int32)) return cv2.boxPoints(rect) / intermed_size # type: ignore[operator] def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray: """Rotate points counter-clockwise. Args: ---- points: array of size (N, 2) angle: angle between -90 and +90 degrees Returns: ------- Rotated points """ angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions rotation_mat = np.array( [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype ) return np.matmul(points, rotation_mat.T) def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[int, int]: """Compute the shape of an expanded rotated image Args: ---- img_shape: the height and width of the image angle: angle between -90 and +90 degrees Returns: ------- the height and width of the rotated image """ points: np.ndarray = np.array([ [img_shape[1] / 2, img_shape[0] / 2], [-img_shape[1] / 2, img_shape[0] / 2], ]) rotated_points = rotate_abs_points(points, angle) wh_shape = 2 * np.abs(rotated_points).max(axis=0) return wh_shape[1], wh_shape[0] def rotate_abs_geoms( geoms: np.ndarray, angle: float, img_shape: Tuple[int, int], expand: bool = True, ) -> np.ndarray: """Rotate a batch of bounding boxes or polygons by an angle around the image center. Args: ---- geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes angle: anti-clockwise rotation angle in degrees img_shape: the height and width of the image expand: whether the image should be padded to avoid information loss Returns: ------- A batch of rotated polygons (N, 4, 2) """ # Switch to polygons polys = ( np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1) if geoms.ndim == 2 else geoms ) polys = polys.astype(np.float32) # Switch to image center as referential polys[..., 0] -= img_shape[1] / 2 polys[..., 1] = img_shape[0] / 2 - polys[..., 1] # Rotated them around image center rotated_polys = rotate_abs_points(polys.reshape(-1, 2), angle).reshape(-1, 4, 2) # Switch back to top-left corner as referential target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape # Clip coords to fit since there is no expansion rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1]) rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0]) return rotated_polys def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray: """Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape. This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox coordinates after a resizing of the image. Args: ---- loc_preds: (N, 4, 2) array of RELATIVE loc_preds orig_shape: shape of the origin image dest_shape: shape of the destination image Returns: ------- A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial """ if len(dest_shape) != 2: raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}") if len(orig_shape) != 2: raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}") orig_height, orig_width = orig_shape dest_height, dest_width = dest_shape mboxes = loc_preds.copy() mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height return mboxes def rotate_boxes( loc_preds: np.ndarray, angle: float, orig_shape: Tuple[int, int], min_angle: float = 1.0, target_shape: Optional[Tuple[int, int]] = None, ) -> np.ndarray: """Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes (4, 2) of an angle, if angle > min_angle, around the center of the page. If target_shape is specified, the boxes are remapped to the target shape after the rotation. This is done to remove the padding that is created by rotate_page(expand=True) Args: ---- loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes angle: angle between -90 and +90 degrees orig_shape: shape of the origin image min_angle: minimum angle to rotate boxes target_shape: shape of the destination image Returns: ------- A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes """ # Change format of the boxes to rotated boxes _boxes = loc_preds.copy() if _boxes.ndim == 2: _boxes = np.stack( [ _boxes[:, [0, 1]], _boxes[:, [2, 1]], _boxes[:, [2, 3]], _boxes[:, [0, 3]], ], axis=1, ) # If small angle, return boxes (no rotation) if abs(angle) < min_angle or abs(angle) > 90 - min_angle: return _boxes # Compute rotation matrix angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions rotation_mat = np.array( [[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype ) # Rotate absolute points points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1) image_center = (orig_shape[1] / 2, orig_shape[0] / 2) rotated_points = image_center + np.matmul(points - image_center, rotation_mat) rotated_boxes: np.ndarray = np.stack( (rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1 ) # Apply a mask if requested if target_shape is not None: rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape) return rotated_boxes def rotate_image( image: np.ndarray, angle: float, expand: bool = False, preserve_origin_shape: bool = False, ) -> np.ndarray: """Rotate an image counterclockwise by an given angle. Args: ---- image: numpy tensor to rotate angle: rotation angle in degrees, between -90 and +90 expand: whether the image should be padded before the rotation preserve_origin_shape: if expand is set to True, resizes the final output to the original image size Returns: ------- Rotated array, padded by 0 by default. """ # Compute the expanded padding exp_img: np.ndarray if expand: exp_shape = compute_expanded_shape(image.shape[:2], angle) # type: ignore[arg-type] h_pad, w_pad = ( int(max(0, ceil(exp_shape[0] - image.shape[0]))), int(max(0, ceil(exp_shape[1] - image.shape[1]))), ) exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) else: exp_img = image height, width = exp_img.shape[:2] rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) if expand: # Pad to get the same aspect ratio if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): # Pad width if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) # Pad height else: h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) if preserve_origin_shape: # rescale rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) return rot_img def estimate_page_angle(polys: np.ndarray) -> float: """Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the estimated angle ccw in degrees """ # Compute mean left points and mean right point with respect to the reading direction (oriented polygon) xleft = polys[:, 0, 0] + polys[:, 3, 0] yleft = polys[:, 0, 1] + polys[:, 3, 1] xright = polys[:, 1, 0] + polys[:, 2, 0] yright = polys[:, 1, 1] + polys[:, 2, 1] with np.errstate(divide="raise", invalid="raise"): try: return float( np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom! ) except FloatingPointError: return 0.0 def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray: """Convert a geometry to relative coordinates Args: ---- geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4) img_shape: the height and width of the image Returns: ------- the updated geometry """ # Polygon if geoms.ndim == 3 and geoms.shape[1:] == (4, 2): polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32) polygons[..., 0] = geoms[..., 0] / img_shape[1] polygons[..., 1] = geoms[..., 1] / img_shape[0] return polygons.clip(0, 1) if geoms.ndim == 2 and geoms.shape[1] == 4: boxes: np.ndarray = np.empty(geoms.shape, dtype=np.float32) boxes[:, ::2] = geoms[:, ::2] / img_shape[1] boxes[:, 1::2] = geoms[:, 1::2] / img_shape[0] return boxes.clip(0, 1) raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}") def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]: """Created cropped images from list of bounding boxes Args: ---- img: input image boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative coordinates (xmin, ymin, xmax, ymax) channels_last: whether the channel dimensions is the last one instead of the last one Returns: ------- list of cropped images """ if boxes.shape[0] == 0: return [] if boxes.shape[1] != 4: raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)") # Project relative coordinates _boxes = boxes.copy() h, w = img.shape[:2] if channels_last else img.shape[-2:] if not np.issubdtype(_boxes.dtype, np.integer): _boxes[:, [0, 2]] *= w _boxes[:, [1, 3]] *= h _boxes = _boxes.round().astype(int) # Add last index _boxes[2:] += 1 if channels_last: return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes]) return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes]) def extract_rcrops( img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True ) -> List[np.ndarray]: """Created cropped images from list of rotated bounding boxes Args: ---- img: input image polys: bounding boxes of shape (N, 4, 2) dtype: target data type of bounding boxes channels_last: whether the channel dimensions is the last one instead of the last one Returns: ------- list of cropped images """ if polys.shape[0] == 0: return [] if polys.shape[1:] != (4, 2): raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)") # Project relative coordinates _boxes = polys.copy() height, width = img.shape[:2] if channels_last else img.shape[-2:] if not np.issubdtype(_boxes.dtype, np.integer): _boxes[:, :, 0] *= width _boxes[:, :, 1] *= height src_pts = _boxes[:, :3].astype(np.float32) # Preserve size d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1) d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1) # (N, 3, 2) dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype) dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1 dst_pts[:, 2, 1] = d2 - 1 # Use a warp transformation to extract the crop crops = [ cv2.warpAffine( img if channels_last else img.transpose(1, 2, 0), # Transformation matrix cv2.getAffineTransform(src_pts[idx], dst_pts[idx]), (int(d1[idx]), int(d2[idx])), ) for idx in range(_boxes.shape[0]) ] return crops