Spaces:
Runtime error
Runtime error
# Copyright (C) 2021-2024, Mindee. | |
# This program is licensed under the Apache License 2.0. | |
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details. | |
from copy import deepcopy | |
from math import ceil | |
from typing import List, Optional, Tuple, Union | |
import cv2 | |
import numpy as np | |
from .common_types import BoundingBox, Polygon4P | |
__all__ = [ | |
"bbox_to_polygon", | |
"polygon_to_bbox", | |
"resolve_enclosing_bbox", | |
"resolve_enclosing_rbbox", | |
"rotate_boxes", | |
"compute_expanded_shape", | |
"rotate_image", | |
"estimate_page_angle", | |
"convert_to_relative_coords", | |
"rotate_abs_geoms", | |
"extract_crops", | |
"extract_rcrops", | |
] | |
def bbox_to_polygon(bbox: BoundingBox) -> Polygon4P: | |
"""Convert a bounding box to a polygon | |
Args: | |
---- | |
bbox: a bounding box | |
Returns: | |
------- | |
a polygon | |
""" | |
return bbox[0], (bbox[1][0], bbox[0][1]), (bbox[0][0], bbox[1][1]), bbox[1] | |
def polygon_to_bbox(polygon: Polygon4P) -> BoundingBox: | |
"""Convert a polygon to a bounding box | |
Args: | |
---- | |
polygon: a polygon | |
Returns: | |
------- | |
a bounding box | |
""" | |
x, y = zip(*polygon) | |
return (min(x), min(y)), (max(x), max(y)) | |
def resolve_enclosing_bbox(bboxes: Union[List[BoundingBox], np.ndarray]) -> Union[BoundingBox, np.ndarray]: | |
"""Compute enclosing bbox either from: | |
Args: | |
---- | |
bboxes: boxes in one of the following formats: | |
- an array of boxes: (*, 5), where boxes have this shape: | |
(xmin, ymin, xmax, ymax, score) | |
- a list of BoundingBox | |
Returns: | |
------- | |
a (1, 5) array (enclosing boxarray), or a BoundingBox | |
""" | |
if isinstance(bboxes, np.ndarray): | |
xmin, ymin, xmax, ymax, score = np.split(bboxes, 5, axis=1) | |
return np.array([xmin.min(), ymin.min(), xmax.max(), ymax.max(), score.mean()]) | |
else: | |
x, y = zip(*[point for box in bboxes for point in box]) | |
return (min(x), min(y)), (max(x), max(y)) | |
def resolve_enclosing_rbbox(rbboxes: List[np.ndarray], intermed_size: int = 1024) -> np.ndarray: | |
"""Compute enclosing rotated bbox either from: | |
Args: | |
---- | |
rbboxes: boxes in one of the following formats: | |
- an array of boxes: (*, 5), where boxes have this shape: | |
(xmin, ymin, xmax, ymax, score) | |
- a list of BoundingBox | |
intermed_size: size of the intermediate image | |
Returns: | |
------- | |
a (1, 5) array (enclosing boxarray), or a BoundingBox | |
""" | |
cloud: np.ndarray = np.concatenate(rbboxes, axis=0) | |
# Convert to absolute for minAreaRect | |
cloud *= intermed_size | |
rect = cv2.minAreaRect(cloud.astype(np.int32)) | |
return cv2.boxPoints(rect) / intermed_size # type: ignore[operator] | |
def rotate_abs_points(points: np.ndarray, angle: float = 0.0) -> np.ndarray: | |
"""Rotate points counter-clockwise. | |
Args: | |
---- | |
points: array of size (N, 2) | |
angle: angle between -90 and +90 degrees | |
Returns: | |
------- | |
Rotated points | |
""" | |
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions | |
rotation_mat = np.array( | |
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=points.dtype | |
) | |
return np.matmul(points, rotation_mat.T) | |
def compute_expanded_shape(img_shape: Tuple[int, int], angle: float) -> Tuple[int, int]: | |
"""Compute the shape of an expanded rotated image | |
Args: | |
---- | |
img_shape: the height and width of the image | |
angle: angle between -90 and +90 degrees | |
Returns: | |
------- | |
the height and width of the rotated image | |
""" | |
points: np.ndarray = np.array([ | |
[img_shape[1] / 2, img_shape[0] / 2], | |
[-img_shape[1] / 2, img_shape[0] / 2], | |
]) | |
rotated_points = rotate_abs_points(points, angle) | |
wh_shape = 2 * np.abs(rotated_points).max(axis=0) | |
return wh_shape[1], wh_shape[0] | |
def rotate_abs_geoms( | |
geoms: np.ndarray, | |
angle: float, | |
img_shape: Tuple[int, int], | |
expand: bool = True, | |
) -> np.ndarray: | |
"""Rotate a batch of bounding boxes or polygons by an angle around the | |
image center. | |
Args: | |
---- | |
geoms: (N, 4) or (N, 4, 2) array of ABSOLUTE coordinate boxes | |
angle: anti-clockwise rotation angle in degrees | |
img_shape: the height and width of the image | |
expand: whether the image should be padded to avoid information loss | |
Returns: | |
------- | |
A batch of rotated polygons (N, 4, 2) | |
""" | |
# Switch to polygons | |
polys = ( | |
np.stack([geoms[:, [0, 1]], geoms[:, [2, 1]], geoms[:, [2, 3]], geoms[:, [0, 3]]], axis=1) | |
if geoms.ndim == 2 | |
else geoms | |
) | |
polys = polys.astype(np.float32) | |
# Switch to image center as referential | |
polys[..., 0] -= img_shape[1] / 2 | |
polys[..., 1] = img_shape[0] / 2 - polys[..., 1] | |
# Rotated them around image center | |
rotated_polys = rotate_abs_points(polys.reshape(-1, 2), angle).reshape(-1, 4, 2) | |
# Switch back to top-left corner as referential | |
target_shape = compute_expanded_shape(img_shape, angle) if expand else img_shape | |
# Clip coords to fit since there is no expansion | |
rotated_polys[..., 0] = (rotated_polys[..., 0] + target_shape[1] / 2).clip(0, target_shape[1]) | |
rotated_polys[..., 1] = (target_shape[0] / 2 - rotated_polys[..., 1]).clip(0, target_shape[0]) | |
return rotated_polys | |
def remap_boxes(loc_preds: np.ndarray, orig_shape: Tuple[int, int], dest_shape: Tuple[int, int]) -> np.ndarray: | |
"""Remaps a batch of rotated locpred (N, 4, 2) expressed for an origin_shape to a destination_shape. | |
This does not impact the absolute shape of the boxes, but allow to calculate the new relative RotatedBbox | |
coordinates after a resizing of the image. | |
Args: | |
---- | |
loc_preds: (N, 4, 2) array of RELATIVE loc_preds | |
orig_shape: shape of the origin image | |
dest_shape: shape of the destination image | |
Returns: | |
------- | |
A batch of rotated loc_preds (N, 4, 2) expressed in the destination referencial | |
""" | |
if len(dest_shape) != 2: | |
raise ValueError(f"Mask length should be 2, was found at: {len(dest_shape)}") | |
if len(orig_shape) != 2: | |
raise ValueError(f"Image_shape length should be 2, was found at: {len(orig_shape)}") | |
orig_height, orig_width = orig_shape | |
dest_height, dest_width = dest_shape | |
mboxes = loc_preds.copy() | |
mboxes[:, :, 0] = ((loc_preds[:, :, 0] * orig_width) + (dest_width - orig_width) / 2) / dest_width | |
mboxes[:, :, 1] = ((loc_preds[:, :, 1] * orig_height) + (dest_height - orig_height) / 2) / dest_height | |
return mboxes | |
def rotate_boxes( | |
loc_preds: np.ndarray, | |
angle: float, | |
orig_shape: Tuple[int, int], | |
min_angle: float = 1.0, | |
target_shape: Optional[Tuple[int, int]] = None, | |
) -> np.ndarray: | |
"""Rotate a batch of straight bounding boxes (xmin, ymin, xmax, ymax, c) or rotated bounding boxes | |
(4, 2) of an angle, if angle > min_angle, around the center of the page. | |
If target_shape is specified, the boxes are remapped to the target shape after the rotation. This | |
is done to remove the padding that is created by rotate_page(expand=True) | |
Args: | |
---- | |
loc_preds: (N, 5) or (N, 4, 2) array of RELATIVE boxes | |
angle: angle between -90 and +90 degrees | |
orig_shape: shape of the origin image | |
min_angle: minimum angle to rotate boxes | |
target_shape: shape of the destination image | |
Returns: | |
------- | |
A batch of rotated boxes (N, 4, 2): or a batch of straight bounding boxes | |
""" | |
# Change format of the boxes to rotated boxes | |
_boxes = loc_preds.copy() | |
if _boxes.ndim == 2: | |
_boxes = np.stack( | |
[ | |
_boxes[:, [0, 1]], | |
_boxes[:, [2, 1]], | |
_boxes[:, [2, 3]], | |
_boxes[:, [0, 3]], | |
], | |
axis=1, | |
) | |
# If small angle, return boxes (no rotation) | |
if abs(angle) < min_angle or abs(angle) > 90 - min_angle: | |
return _boxes | |
# Compute rotation matrix | |
angle_rad = angle * np.pi / 180.0 # compute radian angle for np functions | |
rotation_mat = np.array( | |
[[np.cos(angle_rad), -np.sin(angle_rad)], [np.sin(angle_rad), np.cos(angle_rad)]], dtype=_boxes.dtype | |
) | |
# Rotate absolute points | |
points: np.ndarray = np.stack((_boxes[:, :, 0] * orig_shape[1], _boxes[:, :, 1] * orig_shape[0]), axis=-1) | |
image_center = (orig_shape[1] / 2, orig_shape[0] / 2) | |
rotated_points = image_center + np.matmul(points - image_center, rotation_mat) | |
rotated_boxes: np.ndarray = np.stack( | |
(rotated_points[:, :, 0] / orig_shape[1], rotated_points[:, :, 1] / orig_shape[0]), axis=-1 | |
) | |
# Apply a mask if requested | |
if target_shape is not None: | |
rotated_boxes = remap_boxes(rotated_boxes, orig_shape=orig_shape, dest_shape=target_shape) | |
return rotated_boxes | |
def rotate_image( | |
image: np.ndarray, | |
angle: float, | |
expand: bool = False, | |
preserve_origin_shape: bool = False, | |
) -> np.ndarray: | |
"""Rotate an image counterclockwise by an given angle. | |
Args: | |
---- | |
image: numpy tensor to rotate | |
angle: rotation angle in degrees, between -90 and +90 | |
expand: whether the image should be padded before the rotation | |
preserve_origin_shape: if expand is set to True, resizes the final output to the original image size | |
Returns: | |
------- | |
Rotated array, padded by 0 by default. | |
""" | |
# Compute the expanded padding | |
exp_img: np.ndarray | |
if expand: | |
exp_shape = compute_expanded_shape(image.shape[:2], angle) # type: ignore[arg-type] | |
h_pad, w_pad = ( | |
int(max(0, ceil(exp_shape[0] - image.shape[0]))), | |
int(max(0, ceil(exp_shape[1] - image.shape[1]))), | |
) | |
exp_img = np.pad(image, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) | |
else: | |
exp_img = image | |
height, width = exp_img.shape[:2] | |
rot_mat = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1.0) | |
rot_img = cv2.warpAffine(exp_img, rot_mat, (width, height)) | |
if expand: | |
# Pad to get the same aspect ratio | |
if (image.shape[0] / image.shape[1]) != (rot_img.shape[0] / rot_img.shape[1]): | |
# Pad width | |
if (rot_img.shape[0] / rot_img.shape[1]) > (image.shape[0] / image.shape[1]): | |
h_pad, w_pad = 0, int(rot_img.shape[0] * image.shape[1] / image.shape[0] - rot_img.shape[1]) | |
# Pad height | |
else: | |
h_pad, w_pad = int(rot_img.shape[1] * image.shape[0] / image.shape[1] - rot_img.shape[0]), 0 | |
rot_img = np.pad(rot_img, ((h_pad // 2, h_pad - h_pad // 2), (w_pad // 2, w_pad - w_pad // 2), (0, 0))) | |
if preserve_origin_shape: | |
# rescale | |
rot_img = cv2.resize(rot_img, image.shape[:-1][::-1], interpolation=cv2.INTER_LINEAR) | |
return rot_img | |
def estimate_page_angle(polys: np.ndarray) -> float: | |
"""Takes a batch of rotated previously ORIENTED polys (N, 4, 2) (rectified by the classifier) and return the | |
estimated angle ccw in degrees | |
""" | |
# Compute mean left points and mean right point with respect to the reading direction (oriented polygon) | |
xleft = polys[:, 0, 0] + polys[:, 3, 0] | |
yleft = polys[:, 0, 1] + polys[:, 3, 1] | |
xright = polys[:, 1, 0] + polys[:, 2, 0] | |
yright = polys[:, 1, 1] + polys[:, 2, 1] | |
with np.errstate(divide="raise", invalid="raise"): | |
try: | |
return float( | |
np.median(np.arctan((yleft - yright) / (xright - xleft)) * 180 / np.pi) # Y axis from top to bottom! | |
) | |
except FloatingPointError: | |
return 0.0 | |
def convert_to_relative_coords(geoms: np.ndarray, img_shape: Tuple[int, int]) -> np.ndarray: | |
"""Convert a geometry to relative coordinates | |
Args: | |
---- | |
geoms: a set of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4) | |
img_shape: the height and width of the image | |
Returns: | |
------- | |
the updated geometry | |
""" | |
# Polygon | |
if geoms.ndim == 3 and geoms.shape[1:] == (4, 2): | |
polygons: np.ndarray = np.empty(geoms.shape, dtype=np.float32) | |
polygons[..., 0] = geoms[..., 0] / img_shape[1] | |
polygons[..., 1] = geoms[..., 1] / img_shape[0] | |
return polygons.clip(0, 1) | |
if geoms.ndim == 2 and geoms.shape[1] == 4: | |
boxes: np.ndarray = np.empty(geoms.shape, dtype=np.float32) | |
boxes[:, ::2] = geoms[:, ::2] / img_shape[1] | |
boxes[:, 1::2] = geoms[:, 1::2] / img_shape[0] | |
return boxes.clip(0, 1) | |
raise ValueError(f"invalid format for arg `geoms`: {geoms.shape}") | |
def extract_crops(img: np.ndarray, boxes: np.ndarray, channels_last: bool = True) -> List[np.ndarray]: | |
"""Created cropped images from list of bounding boxes | |
Args: | |
---- | |
img: input image | |
boxes: bounding boxes of shape (N, 4) where N is the number of boxes, and the relative | |
coordinates (xmin, ymin, xmax, ymax) | |
channels_last: whether the channel dimensions is the last one instead of the last one | |
Returns: | |
------- | |
list of cropped images | |
""" | |
if boxes.shape[0] == 0: | |
return [] | |
if boxes.shape[1] != 4: | |
raise AssertionError("boxes are expected to be relative and in order (xmin, ymin, xmax, ymax)") | |
# Project relative coordinates | |
_boxes = boxes.copy() | |
h, w = img.shape[:2] if channels_last else img.shape[-2:] | |
if not np.issubdtype(_boxes.dtype, np.integer): | |
_boxes[:, [0, 2]] *= w | |
_boxes[:, [1, 3]] *= h | |
_boxes = _boxes.round().astype(int) | |
# Add last index | |
_boxes[2:] += 1 | |
if channels_last: | |
return deepcopy([img[box[1] : box[3], box[0] : box[2]] for box in _boxes]) | |
return deepcopy([img[:, box[1] : box[3], box[0] : box[2]] for box in _boxes]) | |
def extract_rcrops( | |
img: np.ndarray, polys: np.ndarray, dtype=np.float32, channels_last: bool = True | |
) -> List[np.ndarray]: | |
"""Created cropped images from list of rotated bounding boxes | |
Args: | |
---- | |
img: input image | |
polys: bounding boxes of shape (N, 4, 2) | |
dtype: target data type of bounding boxes | |
channels_last: whether the channel dimensions is the last one instead of the last one | |
Returns: | |
------- | |
list of cropped images | |
""" | |
if polys.shape[0] == 0: | |
return [] | |
if polys.shape[1:] != (4, 2): | |
raise AssertionError("polys are expected to be quadrilateral, of shape (N, 4, 2)") | |
# Project relative coordinates | |
_boxes = polys.copy() | |
height, width = img.shape[:2] if channels_last else img.shape[-2:] | |
if not np.issubdtype(_boxes.dtype, np.integer): | |
_boxes[:, :, 0] *= width | |
_boxes[:, :, 1] *= height | |
src_pts = _boxes[:, :3].astype(np.float32) | |
# Preserve size | |
d1 = np.linalg.norm(src_pts[:, 0] - src_pts[:, 1], axis=-1) | |
d2 = np.linalg.norm(src_pts[:, 1] - src_pts[:, 2], axis=-1) | |
# (N, 3, 2) | |
dst_pts = np.zeros((_boxes.shape[0], 3, 2), dtype=dtype) | |
dst_pts[:, 1, 0] = dst_pts[:, 2, 0] = d1 - 1 | |
dst_pts[:, 2, 1] = d2 - 1 | |
# Use a warp transformation to extract the crop | |
crops = [ | |
cv2.warpAffine( | |
img if channels_last else img.transpose(1, 2, 0), | |
# Transformation matrix | |
cv2.getAffineTransform(src_pts[idx], dst_pts[idx]), | |
(int(d1[idx]), int(d2[idx])), | |
) | |
for idx in range(_boxes.shape[0]) | |
] | |
return crops | |