Spaces:
Sleeping
Sleeping
File size: 13,446 Bytes
9bf4bd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple
import numpy as np
from shapely.geometry import LineString, Point
from mmocr.utils.check_argument import is_type_list
from mmocr.utils.point_utils import point_distance, points_center
from mmocr.utils.typing_utils import ArrayLike
def rescale_bbox(bbox: np.ndarray,
scale_factor: Tuple[int, int],
mode: str = 'mul') -> np.ndarray:
"""Rescale a bounding box according to scale_factor.
The behavior is different depending on the mode. When mode is 'mul', the
coordinates will be multiplied by scale_factor, which is usually used in
preprocessing transforms such as :func:`Resize`.
The coordinates will be divided by scale_factor if mode is 'div'. It can be
used in postprocessors to recover the bbox in the original image size.
Args:
bbox (ndarray): A bounding box [x1, y1, x2, y2].
scale_factor (tuple(int, int)): (w_scale, h_scale).
model (str): Rescale mode. Can be 'mul' or 'div'. Defaults to 'mul'.
Returns:
np.ndarray: Rescaled bbox.
"""
assert mode in ['mul', 'div']
bbox = np.array(bbox, dtype=np.float32)
bbox_shape = bbox.shape
reshape_bbox = bbox.reshape(-1, 2)
scale_factor = np.array(scale_factor, dtype=float)
if mode == 'div':
scale_factor = 1 / scale_factor
bbox = (reshape_bbox * scale_factor[None]).reshape(bbox_shape)
return bbox
def rescale_bboxes(bboxes: np.ndarray,
scale_factor: Tuple[int, int],
mode: str = 'mul') -> np.ndarray:
"""Rescale bboxes according to scale_factor.
The behavior is different depending on the mode. When mode is 'mul', the
coordinates will be multiplied by scale_factor, which is usually used in
preprocessing transforms such as :func:`Resize`.
The coordinates will be divided by scale_factor if mode is 'div'. It can be
used in postprocessors to recover the bboxes in the original
image size.
Args:
bboxes (np.ndarray]): Bounding bboxes in shape (N, 4)
scale_factor (tuple(int, int)): (w_scale, h_scale).
model (str): Rescale mode. Can be 'mul' or 'div'. Defaults to 'mul'.
Returns:
list[np.ndarray]: Rescaled bboxes.
"""
bboxes = rescale_bbox(bboxes, scale_factor, mode)
return bboxes
def bbox2poly(bbox: ArrayLike, mode: str = 'xyxy') -> np.array:
"""Converting a bounding box to a polygon.
Args:
bbox (ArrayLike): A bbox. In any form can be accessed by 1-D indices.
E.g. list[float], np.ndarray, or torch.Tensor. bbox is written in
[x1, y1, x2, y2].
mode (str): Specify the format of bbox. Can be 'xyxy' or 'xywh'.
Defaults to 'xyxy'.
Returns:
np.array: The converted polygon [x1, y1, x2, y1, x2, y2, x1, y2].
"""
assert len(bbox) == 4
if mode == 'xyxy':
x1, y1, x2, y2 = bbox
poly = np.array([x1, y1, x2, y1, x2, y2, x1, y2])
elif mode == 'xywh':
x, y, w, h = bbox
poly = np.array([x, y, x + w, y, x + w, y + h, x, y + h])
else:
raise NotImplementedError('Not supported mode.')
return poly
def is_on_same_line(box_a, box_b, min_y_overlap_ratio=0.8):
# TODO Check if it should be deleted after ocr.py refactored
"""Check if two boxes are on the same line by their y-axis coordinates.
Two boxes are on the same line if they overlap vertically, and the length
of the overlapping line segment is greater than min_y_overlap_ratio * the
height of either of the boxes.
Args:
box_a (list), box_b (list): Two bounding boxes to be checked
min_y_overlap_ratio (float): The minimum vertical overlapping ratio
allowed for boxes in the same line
Returns:
The bool flag indicating if they are on the same line
"""
a_y_min = np.min(box_a[1::2])
b_y_min = np.min(box_b[1::2])
a_y_max = np.max(box_a[1::2])
b_y_max = np.max(box_b[1::2])
# Make sure that box a is always the box above another
if a_y_min > b_y_min:
a_y_min, b_y_min = b_y_min, a_y_min
a_y_max, b_y_max = b_y_max, a_y_max
if b_y_min <= a_y_max:
if min_y_overlap_ratio is not None:
sorted_y = sorted([b_y_min, b_y_max, a_y_max])
overlap = sorted_y[1] - sorted_y[0]
min_a_overlap = (a_y_max - a_y_min) * min_y_overlap_ratio
min_b_overlap = (b_y_max - b_y_min) * min_y_overlap_ratio
return overlap >= min_a_overlap or \
overlap >= min_b_overlap
else:
return True
return False
def stitch_boxes_into_lines(boxes, max_x_dist=10, min_y_overlap_ratio=0.8):
# TODO Check if it should be deleted after ocr.py refactored
"""Stitch fragmented boxes of words into lines.
Note: part of its logic is inspired by @Johndirr
(https://github.com/faustomorales/keras-ocr/issues/22)
Args:
boxes (list): List of ocr results to be stitched
max_x_dist (int): The maximum horizontal distance between the closest
edges of neighboring boxes in the same line
min_y_overlap_ratio (float): The minimum vertical overlapping ratio
allowed for any pairs of neighboring boxes in the same line
Returns:
merged_boxes(list[dict]): List of merged boxes and texts
"""
if len(boxes) <= 1:
return boxes
merged_boxes = []
# sort groups based on the x_min coordinate of boxes
x_sorted_boxes = sorted(boxes, key=lambda x: np.min(x['box'][::2]))
# store indexes of boxes which are already parts of other lines
skip_idxs = set()
i = 0
# locate lines of boxes starting from the leftmost one
for i in range(len(x_sorted_boxes)):
if i in skip_idxs:
continue
# the rightmost box in the current line
rightmost_box_idx = i
line = [rightmost_box_idx]
for j in range(i + 1, len(x_sorted_boxes)):
if j in skip_idxs:
continue
if is_on_same_line(x_sorted_boxes[rightmost_box_idx]['box'],
x_sorted_boxes[j]['box'], min_y_overlap_ratio):
line.append(j)
skip_idxs.add(j)
rightmost_box_idx = j
# split line into lines if the distance between two neighboring
# sub-lines' is greater than max_x_dist
lines = []
line_idx = 0
lines.append([line[0]])
rightmost = np.max(x_sorted_boxes[line[0]]['box'][::2])
for k in range(1, len(line)):
curr_box = x_sorted_boxes[line[k]]
dist = np.min(curr_box['box'][::2]) - rightmost
if dist > max_x_dist:
line_idx += 1
lines.append([])
lines[line_idx].append(line[k])
rightmost = max(rightmost, np.max(curr_box['box'][::2]))
# Get merged boxes
for box_group in lines:
merged_box = {}
merged_box['text'] = ' '.join(
[x_sorted_boxes[idx]['text'] for idx in box_group])
x_min, y_min = float('inf'), float('inf')
x_max, y_max = float('-inf'), float('-inf')
for idx in box_group:
x_max = max(np.max(x_sorted_boxes[idx]['box'][::2]), x_max)
x_min = min(np.min(x_sorted_boxes[idx]['box'][::2]), x_min)
y_max = max(np.max(x_sorted_boxes[idx]['box'][1::2]), y_max)
y_min = min(np.min(x_sorted_boxes[idx]['box'][1::2]), y_min)
merged_box['box'] = [
x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max
]
merged_boxes.append(merged_box)
return merged_boxes
def bezier2polygon(bezier_points: np.ndarray,
num_sample: int = 20) -> List[np.ndarray]:
# TODO check test later
"""Sample points from the boundary of a polygon enclosed by two Bezier
curves, which are controlled by ``bezier_points``.
Args:
bezier_points (ndarray): A :math:`(2, 4, 2)` array of 8 Bezeir points
or its equalivance. The first 4 points control the curve at one
side and the last four control the other side.
num_sample (int): The number of sample points at each Bezeir curve.
Defaults to 20.
Returns:
list[ndarray]: A list of 2*num_sample points representing the polygon
extracted from Bezier curves.
Warning:
The points are not guaranteed to be ordered. Please use
:func:`mmocr.utils.sort_points` to sort points if necessary.
"""
assert num_sample > 0, 'The sampling number should greater than 0'
bezier_points = np.asarray(bezier_points)
assert np.prod(
bezier_points.shape) == 16, 'Need 8 Bezier control points to continue!'
bezier = bezier_points.reshape(2, 4, 2).transpose(0, 2, 1).reshape(4, 4)
u = np.linspace(0, 1, num_sample)
points = np.outer((1 - u) ** 3, bezier[:, 0]) \
+ np.outer(3 * u * ((1 - u) ** 2), bezier[:, 1]) \
+ np.outer(3 * (u ** 2) * (1 - u), bezier[:, 2]) \
+ np.outer(u ** 3, bezier[:, 3])
# Convert points to polygon
points = np.concatenate((points[:, :2], points[:, 2:]), axis=0)
return points.tolist()
def sort_vertex(points_x, points_y):
# TODO Add typehints & docstring & test
"""Sort box vertices in clockwise order from left-top first.
Args:
points_x (list[float]): x of four vertices.
points_y (list[float]): y of four vertices.
Returns:
sorted_points_x (list[float]): x of sorted four vertices.
sorted_points_y (list[float]): y of sorted four vertices.
"""
assert is_type_list(points_x, (float, int))
assert is_type_list(points_y, (float, int))
assert len(points_x) == 4
assert len(points_y) == 4
vertices = np.stack((points_x, points_y), axis=-1).astype(np.float32)
vertices = _sort_vertex(vertices)
sorted_points_x = list(vertices[:, 0])
sorted_points_y = list(vertices[:, 1])
return sorted_points_x, sorted_points_y
def _sort_vertex(vertices):
# TODO Add typehints & docstring & test
assert vertices.ndim == 2
assert vertices.shape[-1] == 2
N = vertices.shape[0]
if N == 0:
return vertices
center = np.mean(vertices, axis=0)
directions = vertices - center
angles = np.arctan2(directions[:, 1], directions[:, 0])
sort_idx = np.argsort(angles)
vertices = vertices[sort_idx]
left_top = np.min(vertices, axis=0)
dists = np.linalg.norm(left_top - vertices, axis=-1, ord=2)
lefttop_idx = np.argmin(dists)
indexes = (np.arange(N, dtype=np.int_) + lefttop_idx) % N
return vertices[indexes]
def sort_vertex8(points):
# TODO Add typehints & docstring & test
"""Sort vertex with 8 points [x1 y1 x2 y2 x3 y3 x4 y4]"""
assert len(points) == 8
vertices = _sort_vertex(np.array(points, dtype=np.float32).reshape(-1, 2))
sorted_box = list(vertices.flatten())
return sorted_box
def bbox_center_distance(box1: ArrayLike, box2: ArrayLike) -> float:
"""Calculate the distance between the center points of two bounding boxes.
Args:
box1 (ArrayLike): The first bounding box
represented in [x1, y1, x2, y2].
box2 (ArrayLike): The second bounding box
represented in [x1, y1, x2, y2].
Returns:
float: The distance between the center points of two bounding boxes.
"""
return point_distance(points_center(box1), points_center(box2))
def bbox_diag_distance(box: ArrayLike) -> float:
"""Calculate the diagonal length of a bounding box (distance between the
top-left and bottom-right).
Args:
box (ArrayLike): The bounding box represented in
[x1, y1, x2, y2, x3, y3, x4, y4] or [x1, y1, x2, y2].
Returns:
float: The diagonal length of the bounding box.
"""
box = np.array(box, dtype=np.float32)
assert (box.size == 8 or box.size == 4)
if box.size == 8:
diag = point_distance(box[0:2], box[4:6])
elif box.size == 4:
diag = point_distance(box[0:2], box[2:4])
return diag
def bbox_jitter(points_x, points_y, jitter_ratio_x=0.5, jitter_ratio_y=0.1):
"""Jitter on the coordinates of bounding box.
Args:
points_x (list[float | int]): List of y for four vertices.
points_y (list[float | int]): List of x for four vertices.
jitter_ratio_x (float): Horizontal jitter ratio relative to the height.
jitter_ratio_y (float): Vertical jitter ratio relative to the height.
"""
assert len(points_x) == 4
assert len(points_y) == 4
assert isinstance(jitter_ratio_x, float)
assert isinstance(jitter_ratio_y, float)
assert 0 <= jitter_ratio_x < 1
assert 0 <= jitter_ratio_y < 1
points = [Point(points_x[i], points_y[i]) for i in range(4)]
line_list = [
LineString([points[i], points[i + 1 if i < 3 else 0]])
for i in range(4)
]
tmp_h = max(line_list[1].length, line_list[3].length)
for i in range(4):
jitter_pixel_x = (np.random.rand() - 0.5) * 2 * jitter_ratio_x * tmp_h
jitter_pixel_y = (np.random.rand() - 0.5) * 2 * jitter_ratio_y * tmp_h
points_x[i] += jitter_pixel_x
points_y[i] += jitter_pixel_y
|