File size: 7,400 Bytes
4ea50ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import math

import torch
import pandas as pd
from maskrcnn_benchmark.data.datasets.evaluation.word import io_
class BoxCoder(object):
    """
    This class encodes and decodes a set of bounding boxes into the representation used for training the regressors.
    """

    def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
        """
        Arguments:
            weights (4-element tuple)
            bbox_xform_clip (float)
        """
        self.weights = weights
        self.bbox_xform_clip = bbox_xform_clip

    def encode(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """
        TO_REMOVE = 1  # TODO remove
        ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
        ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
        ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
        ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights

        gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
        gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
        gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
        gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights

        wx, wy, ww, wh = self.weights
        targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
        targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
        targets_dw = ww * torch.log(gt_widths / ex_widths)
        targets_dh = wh * torch.log(gt_heights / ex_heights)

        targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
        return targets

    def encode_iou(self, reference_boxes, proposals):
        """
        Encode a set of proposals with respect to some
        reference boxes

        Arguments:
            reference_boxes (Tensor): reference boxes
            proposals (Tensor): boxes to be encoded
        """
        TO_REMOVE = 1  # TODO remove
        ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
        ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
        ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
        ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights

        gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
        gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
        gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
        gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights

        wx, wy, ww, wh = self.weights
        targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
        targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
        targets_dw = ww * torch.log(gt_widths / ex_widths)
        targets_dh = wh * torch.log(gt_heights / ex_heights)

        targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
        return targets


    def decode(self, rel_codes, boxes):
        """
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Arguments:
            rel_codes (Tensor): encoded boxes   # predict  [2, 12000, 4]
            boxes (Tensor): reference boxes.   # anchor  [2, 12000, 4]  xmin0 ymin1 xmax2 ymax3
        """
        boxes = boxes.to(rel_codes.dtype)


        TO_REMOVE = 1  # TODO remove
        widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
        heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
        ctr_x = boxes[:, 0] + 0.5 * widths
        ctr_y = boxes[:, 1] + 0.5 * heights

        wx, wy, ww, wh = self.weights
        dx = rel_codes[:, 0::4] / wx
        dy = rel_codes[:, 1::4] / wy
        dw = rel_codes[:, 2::4] / ww
        dh = rel_codes[:, 3::4] / wh

        dw = torch.clamp(dw, max=self.bbox_xform_clip)
        dh = torch.clamp(dh, max=self.bbox_xform_clip)

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        ##############################

        pred_boxes = torch.zeros_like(rel_codes)
        pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
        pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
        pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
        pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1

        return pred_boxes


    def decode_iou(self, rel_codes, boxes, num_p = 8):
        """
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Arguments:
            rel_codes (Tensor): encoded boxes   # predict  [2, 12000, 4]
            boxes (Tensor): reference boxes.   # anchor  [2, 12000, 4]  xmin0 ymin1 xmax2 ymax3
        """
        boxes = boxes.to(rel_codes.dtype)

        TO_REMOVE = 1  # TODO remove
        widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
        heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE

        ctr_x = boxes[:, 0] + 0.5 * widths
        ctr_y = boxes[:, 1] + 0.5 * heights
        # 123
        # 8#4
        # 765
        if num_p == 8:  # 8 boundary points
            x_1 = boxes[:, 0] + widths * rel_codes[:, 0]
            y_1 = boxes[:, 1] + heights * rel_codes[:, 1]
            x_2 = ctr_x + widths * rel_codes[:, 2]
            y_2 = boxes[:, 1] + heights * rel_codes[:, 3]
            x_3 = boxes[:, 2] + widths * rel_codes[:, 4]
            y_3 = boxes[:, 1] + heights * rel_codes[:, 5]
            x_4 = boxes[:, 2] + widths * rel_codes[:, 6]
            y_4 = ctr_y + heights * rel_codes[:, 7]
            x_5 = boxes[:, 2] + widths * rel_codes[:, 8]
            y_5 = boxes[:, 3] + heights * rel_codes[:, 9]
            x_6 = ctr_x + widths * rel_codes[:, 10]
            y_6 = boxes[:, 3] + heights * rel_codes[:, 11]
            x_7 = boxes[:, 0] + widths * rel_codes[:, 12]
            y_7 = boxes[:, 3] + heights * rel_codes[:, 13]
            x_8 = boxes[:, 0] + widths * rel_codes[:, 14]
            y_8 = ctr_y + heights * rel_codes[:, 15]
            x_total = torch.stack([x_1, x_2, x_3, x_4, x_5, x_6, x_7, x_8], 0)
            y_total = torch.stack([y_1, y_2, y_3, y_4, y_5, y_6, y_7, y_8], 0)

        x_min = torch.min(x_total, 0, keepdim=True)  # [1, N]
        x_max = torch.max(x_total, 0, keepdim=True)

        y_min = torch.min(y_total, 0, keepdim=True)
        y_max = torch.max(y_total, 0, keepdim=True)

        N1, N2 = x_min[0].shape
        x_min = x_min[0].view([N2])
        x_max = x_max[0].view([N2])
        y_min = y_min[0].view([N2])
        y_max = y_max[0].view([N2])

        x_min = torch.stack([x_min, ctr_x], 0)
        x_max = torch.stack([x_max, ctr_x], 0)
        y_min = torch.stack([y_min, ctr_y], 0)
        y_max = torch.stack([y_max, ctr_y], 0)

        x_min = torch.min(x_min, 0, keepdim=True)  # [1, N]
        x_max = torch.max(x_max, 0, keepdim=True)
        y_min = torch.min(y_min, 0, keepdim=True)
        y_max = torch.max(y_max, 0, keepdim=True)

        pred_boxes = torch.zeros_like(boxes)

        pred_boxes[:, 0] = x_min[0][0, :]
        pred_boxes[:, 1] = y_min[0][0, :]
        pred_boxes[:, 2] = x_max[0][0, :]
        pred_boxes[:, 3] = y_max[0][0, :]


        return pred_boxes