File size: 5,269 Bytes
32b542e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import itertools
import numpy as np
import torch
# from torch.nn.utils.rnn import pad_sequence

def pad_sequence(sequences, batch_first=False, padding_value=0.0, padding_length=None):
    """
    modified from  torch.nn.utils.rnn.pad_sequence

    """

    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = sequences[0].size()
    trailing_dims = max_size[1:]
    max_len = max([s.size(0) for s in sequences]) if padding_length is None else padding_length
    if batch_first:
        out_dims = (len(sequences), max_len) + trailing_dims
    else:
        out_dims = (max_len, len(sequences)) + trailing_dims

    out_tensor = sequences[0].new_full(out_dims, padding_value)
    for i, tensor in enumerate(sequences):
        length = tensor.size(0)
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
            out_tensor[i, :length, ...] = tensor
        else:
            out_tensor[:length, i, ...] = tensor

    return out_tensor


def pad_tensor(tensor, padding_value, use_mask, padding_length=None):
    if isinstance(tensor[0], list):
        tensor = list(itertools.chain.from_iterable(tensor))

    out = pad_sequence(tensor, batch_first=True, padding_value=padding_value, padding_length=padding_length)
    if use_mask:
        lengths = [t.size(0) for t in tensor]
        max_lengths = max(lengths) if padding_length is None else padding_length
        mask = torch.zeros((out.size(0), max_lengths), dtype=torch.uint8)
        for i, length in enumerate(lengths):
            mask[i, 0:length] = 1
        return out, mask
    else:
        return out

def dict_to_cuda(input_dict):
    for key in input_dict:
        if isinstance(input_dict[key], torch.Tensor):
            input_dict[key] = input_dict[key].cuda(non_blocking=True)
        elif isinstance(input_dict[key], dict):
            input_dict[key] = dict_to_cuda(input_dict[key])



def dict_as_tensor(input_data):
    if isinstance(input_data, str) or isinstance(input_data, tuple) or  isinstance(input_data, int) or input_data is None:
        pass 
    elif isinstance(input_data, dict):
        for key in input_data:
            input_data[key] = dict_as_tensor(input_data[key])
    elif isinstance(input_data, list):
        input_data = [dict_as_tensor(item) for item in input_data]
    else:
        input_data = torch.as_tensor(input_data)
    return input_data 


def boxes_to_locfeats(boxes, image_w, image_h):
    image_location = np.zeros((boxes.shape[0], 5), dtype=np.float32)
    image_location[:, :4] = boxes
    image_location[:, 4] = (
        (image_location[:, 3] - image_location[:, 1])
        * (image_location[:, 2] - image_location[:, 0])
        / (float(image_w) * float(image_h))
    )

    image_location[:, 0] = image_location[:, 0] / float(image_w)
    image_location[:, 1] = image_location[:, 1] / float(image_h)
    image_location[:, 2] = image_location[:, 2] / float(image_w)
    image_location[:, 3] = image_location[:, 3] / float(image_h)
    return image_location

def expand_tensor(tensor, size, dim=1):
    if size == 1 or tensor is None:
        return tensor
    tensor = tensor.unsqueeze(dim)
    if dim == 0:
        tensor = tensor.expand([size] + [-1] + list(tensor.shape[2:]))
        tensor = tensor.reshape([-1] + list(tensor.shape[2:]))
    else:
        tensor = tensor.expand(list(tensor.shape[:dim]) + [size] + list(tensor.shape[dim+1:]))
        tensor = tensor.reshape(list(tensor.shape[:dim-1]) + [-1] + list(tensor.shape[dim+1:]))
    return tensor

def iou(anchors, gt_boxes):
    """
    anchors: (N, 4) ndarray of float
    gt_boxes: (K, 4) ndarray of float
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    N = anchors.shape[0]
    K = gt_boxes.shape[0]

    gt_boxes_area = (
        (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1)
    ).reshape(1, K)

    anchors_area = (
        (anchors[:, 2] - anchors[:, 0] + 1) * (anchors[:, 3] - anchors[:, 1] + 1)
    ).reshape(N, 1)

    boxes = np.repeat(anchors.reshape(N, 1, 4), K, axis=1)
    query_boxes = np.repeat(gt_boxes.reshape(1, K, 4), N, axis=0)

    iw = (
        np.minimum(boxes[:, :, 2], query_boxes[:, :, 2])
        - np.maximum(boxes[:, :, 0], query_boxes[:, :, 0])
        + 1
    )
    iw[iw < 0] = 0

    ih = (
        np.minimum(boxes[:, :, 3], query_boxes[:, :, 3])
        - np.maximum(boxes[:, :, 1], query_boxes[:, :, 1])
        + 1
    )
    ih[ih < 0] = 0

    ua = anchors_area + gt_boxes_area - (iw * ih)
    overlaps = iw * ih / ua

    return overlaps


def get_max_len_from_mask(mask):
    return int(mask.sum(1).max().item())


def clip_v_inputs(v_feats, spatials, image_mask):
    max_len = get_max_len_from_mask(image_mask)
    v_feats = v_feats[:, :max_len]
    spatials = spatials[:, :max_len]
    image_mask = image_mask[:, :max_len]
    return v_feats, spatials, image_mask


def clip_t_inputs(input_txt, segment_ids, input_mask):
    max_len = get_max_len_from_mask(input_mask)
    input_txt = input_txt[:, :max_len]
    segment_ids = segment_ids[:, :max_len]
    input_mask = input_mask[:, :max_len]
    return input_txt, segment_ids, input_mask