File size: 6,969 Bytes
153628e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# Copyright (C) 2021-2024, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

from typing import Tuple, Union

import cv2
import numpy as np

from doctr.utils.geometry import rotate_abs_geoms

__all__ = ["crop_boxes", "create_shadow_mask"]


def crop_boxes(
    boxes: np.ndarray,
    crop_box: Union[Tuple[int, int, int, int], Tuple[float, float, float, float]],
) -> np.ndarray:
    """Crop localization boxes

    Args:
    ----
        boxes: ndarray of shape (N, 4) in relative or abs coordinates
        crop_box: box (xmin, ymin, xmax, ymax) to crop the image, in the same coord format that the boxes

    Returns:
    -------
        the cropped boxes
    """
    is_box_rel = boxes.max() <= 1
    is_crop_rel = max(crop_box) <= 1

    if is_box_rel ^ is_crop_rel:
        raise AssertionError("both the boxes and the crop need to have the same coordinate convention")

    xmin, ymin, xmax, ymax = crop_box
    # Clip boxes & correct offset
    boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(xmin, xmax) - xmin
    boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(ymin, ymax) - ymin
    # Rescale relative coords
    if is_box_rel:
        boxes[:, [0, 2]] /= xmax - xmin
        boxes[:, [1, 3]] /= ymax - ymin

    # Remove 0-sized boxes
    is_valid = np.logical_and(boxes[:, 1] < boxes[:, 3], boxes[:, 0] < boxes[:, 2])

    return boxes[is_valid]


def expand_line(line: np.ndarray, target_shape: Tuple[int, int]) -> Tuple[float, float]:
    """Expands a 2-point line, so that the first is on the edge. In other terms, we extend the line in
    the same direction until we meet one of the edges.

    Args:
    ----
        line: array of shape (2, 2) of the point supposed to be on one edge, and the shadow tip.
        target_shape: the desired mask shape

    Returns:
    -------
        2D coordinates of the first point once we extended the line (on one of the edges)
    """
    if any(coord == 0 or coord == size for coord, size in zip(line[0], target_shape[::-1])):
        return line[0]
    # Get the line equation
    _tmp = line[1] - line[0]
    _direction = _tmp > 0
    _flat = _tmp == 0
    # vertical case
    if _tmp[0] == 0:
        solutions = [
            # y = 0
            (line[0, 0], 0),
            # y = bot
            (line[0, 0], target_shape[0]),
        ]
    # horizontal
    elif _tmp[1] == 0:
        solutions = [
            # x = 0
            (0, line[0, 1]),
            # x = right
            (target_shape[1], line[0, 1]),
        ]
    else:
        alpha = _tmp[1] / _tmp[0]
        beta = line[1, 1] - alpha * line[1, 0]

        # Solve it for edges
        solutions = [
            # x = 0
            (0, beta),
            # y = 0
            (-beta / alpha, 0),
            # x = right
            (target_shape[1], alpha * target_shape[1] + beta),
            # y = bot
            ((target_shape[0] - beta) / alpha, target_shape[0]),
        ]
    for point in solutions:
        # Skip points that are out of the final image
        if any(val < 0 or val > size for val, size in zip(point, target_shape[::-1])):
            continue
        if all(
            val == ref if _same else (val < ref if _dir else val > ref)
            for val, ref, _dir, _same in zip(point, line[1], _direction, _flat)
        ):
            return point
    raise ValueError


def create_shadow_mask(
    target_shape: Tuple[int, int],
    min_base_width=0.3,
    max_tip_width=0.5,
    max_tip_height=0.3,
) -> np.ndarray:
    """Creates a random shadow mask

    Args:
    ----
        target_shape: the target shape (H, W)
        min_base_width: the relative minimum shadow base width
        max_tip_width: the relative maximum shadow tip width
        max_tip_height: the relative maximum shadow tip height

    Returns:
    -------
        a numpy ndarray of shape (H, W, 1) with values in the range [0, 1]
    """
    # Default base is top
    _params = np.random.rand(6)
    base_width = min_base_width + (1 - min_base_width) * _params[0]
    base_center = base_width / 2 + (1 - base_width) * _params[1]
    # Ensure tip width is smaller for shadow consistency
    tip_width = min(_params[2] * base_width * target_shape[0] / target_shape[1], max_tip_width)
    tip_center = tip_width / 2 + (1 - tip_width) * _params[3]
    tip_height = _params[4] * max_tip_height
    tip_mid = tip_height / 2 + (1 - tip_height) * _params[5]
    _order = tip_center < base_center
    contour: np.ndarray = np.array(
        [
            [base_center - base_width / 2, 0],
            [base_center + base_width / 2, 0],
            [tip_center + tip_width / 2, tip_mid + tip_height / 2 if _order else tip_mid - tip_height / 2],
            [tip_center - tip_width / 2, tip_mid - tip_height / 2 if _order else tip_mid + tip_height / 2],
        ],
        dtype=np.float32,
    )

    # Convert to absolute coords
    abs_contour: np.ndarray = (
        np.stack(
            (contour[:, 0] * target_shape[1], contour[:, 1] * target_shape[0]),
            axis=-1,
        )
        .round()
        .astype(np.int32)
    )

    # Direction
    _params = np.random.rand(1)
    rotated_contour = (
        rotate_abs_geoms(
            abs_contour[None, ...],
            360 * _params[0],
            target_shape,
            expand=False,
        )[0]
        .round()
        .astype(np.int32)
    )
    # Check approx quadrant
    quad_idx = int(_params[0] / 0.25)
    # Top-bot
    if quad_idx % 2 == 0:
        intensity_mask = np.repeat(np.arange(target_shape[0])[:, None], target_shape[1], axis=1) / (target_shape[0] - 1)
        if quad_idx == 0:
            intensity_mask = 1 - intensity_mask
    # Left - right
    else:
        intensity_mask = np.repeat(np.arange(target_shape[1])[None, :], target_shape[0], axis=0) / (target_shape[1] - 1)
        if quad_idx == 1:
            intensity_mask = 1 - intensity_mask

    # Expand base
    final_contour = rotated_contour.copy()
    final_contour[0] = expand_line(final_contour[[0, 3]], target_shape)
    final_contour[1] = expand_line(final_contour[[1, 2]], target_shape)
    # If both base are not on the same side, add a point
    if not np.any(final_contour[0] == final_contour[1]):
        corner_x = 0 if max(final_contour[0, 0], final_contour[1, 0]) < target_shape[1] else target_shape[1]
        corner_y = 0 if max(final_contour[0, 1], final_contour[1, 1]) < target_shape[0] else target_shape[0]
        corner: np.ndarray = np.array([corner_x, corner_y])
        final_contour = np.concatenate((final_contour[:1], corner[None, ...], final_contour[1:]), axis=0)

    # Direction & rotate
    mask: np.ndarray = np.zeros((*target_shape, 1), dtype=np.uint8)
    mask = cv2.fillPoly(mask, [final_contour], (255,), lineType=cv2.LINE_AA)[..., 0]

    return (mask / 255).astype(np.float32).clip(0, 1) * intensity_mask.astype(np.float32)  # type: ignore[operator]