from __future__ import division import torch import numpy as np import cv2 import os.path as osp from bbox import bbox_iou def get_path(cur_file): cur_dir = osp.dirname(osp.realpath(cur_file)) project_root = osp.join(cur_dir, '../../../') chk_root = osp.join(project_root, 'checkpoint/') data_root = osp.join(project_root, 'data/') return project_root, chk_root, data_root, cur_dir def count_parameters(model): return sum(p.numel() for p in model.parameters()) def count_learnable_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) def convert2cpu(matrix): if matrix.is_cuda: return torch.FloatTensor(matrix.size()).copy_(matrix) else: return matrix def predict_transform(prediction, inp_dim, anchors, num_classes, CUDA = True): batch_size = prediction.size(0) stride = inp_dim // prediction.size(2) grid_size = inp_dim // stride bbox_attrs = 5 + num_classes num_anchors = len(anchors) anchors = [(a[0]/stride, a[1]/stride) for a in anchors] prediction = prediction.view(batch_size, bbox_attrs*num_anchors, grid_size*grid_size) prediction = prediction.transpose(1, 2).contiguous() prediction = prediction.view(batch_size, grid_size*grid_size*num_anchors, bbox_attrs) # Sigmoid the centre_X, centre_Y. and object confidencce prediction[:, :, 0] = torch.sigmoid(prediction[:, :, 0]) prediction[:, :, 1] = torch.sigmoid(prediction[:, :, 1]) prediction[:, :, 4] = torch.sigmoid(prediction[:, :, 4]) # Add the center offsets grid_len = np.arange(grid_size) a, b = np.meshgrid(grid_len, grid_len) x_offset = torch.FloatTensor(a).view(-1, 1) y_offset = torch.FloatTensor(b).view(-1, 1) if CUDA: x_offset = x_offset.cuda() y_offset = y_offset.cuda() x_y_offset = torch.cat((x_offset, y_offset), 1).repeat(1, num_anchors).view(-1, 2).unsqueeze(0) prediction[:, :, :2] += x_y_offset # log space transform height and the width anchors = torch.FloatTensor(anchors) if CUDA: anchors = anchors.cuda() anchors = anchors.repeat(grid_size*grid_size, 1).unsqueeze(0) prediction[:, :, 2:4] = torch.exp(prediction[:, :, 2:4])*anchors # Softmax the class scores prediction[:, :, 5: 5 + num_classes] = torch.sigmoid((prediction[:, :, 5: 5 + num_classes])) prediction[:, :, :4] *= stride return prediction def load_classes(namesfile): fp = open(namesfile, "r") names = fp.read().split("\n")[:-1] return names def get_im_dim(im): im = cv2.imread(im) w, h = im.shape[1], im.shape[0] return w, h def unique(tensor): tensor_np = tensor.cpu().numpy() unique_np = np.unique(tensor_np) unique_tensor = torch.from_numpy(unique_np) tensor_res = tensor.new(unique_tensor.shape) tensor_res.copy_(unique_tensor) return tensor_res # ADD SOFT NMS def write_results(prediction, confidence, num_classes, nms=True, nms_conf=0.4, det_hm=False): """ https://blog.paperspace.com/how-to-implement-a-yolo-v3-object-detector-from-scratch-in-pytorch-part-4/ prediction: (B x 10647 x 85) B: the number of images in a batch, 10647: the number of bounding boxes predicted per image. (52×52+26×26+13×13)×3=10647 85: the number of bounding box attributes. (c_x, c_y, w, h, object confidence, and 80 class scores) output: Num_obj × [img_index, x_1, y_1, x_2, y_2, object confidence, class_score, label_index] """ conf_mask = (prediction[:, :, 4] > confidence).float().unsqueeze(2) prediction = prediction*conf_mask box_a = prediction.new(prediction.shape) box_a[:, :, 0] = (prediction[:, :, 0] - prediction[:, :, 2]/2) box_a[:, :, 1] = (prediction[:, :, 1] - prediction[:, :, 3]/2) box_a[:, :, 2] = (prediction[:, :, 0] + prediction[:, :, 2]/2) box_a[:, :, 3] = (prediction[:, :, 1] + prediction[:, :, 3]/2) prediction[:, :, :4] = box_a[:, :, :4] batch_size = prediction.size(0) output = prediction.new(1, prediction.size(2) + 1) write = False for ind in range(batch_size): # select the image from the batch image_pred = prediction[ind] # Get the class having maximum score, and the index of that class # Get rid of num_classes softmax scores # Add the class index and the class score of class having maximum score max_conf, max_conf_index = torch.max(image_pred[:, 5:5 + num_classes], 1) max_conf = max_conf.float().unsqueeze(1) max_conf_index = max_conf_index.float().unsqueeze(1) seq = (image_pred[:, :5], max_conf, max_conf_index) image_pred = torch.cat(seq, 1) # image_pred:(10647, 7) 7:[x1, y1, x2, y2, obj_score, max_conf, max_conf_index] # Get rid of the zero entries non_zero_ind = (torch.nonzero(image_pred[:, 4])) image_pred__ = image_pred[non_zero_ind.squeeze(), :].view(-1, 7) # filters out people id if det_hm: cls_mask = (image_pred__[:, -1] == 0).float() class_mask_ind = torch.nonzero(cls_mask).squeeze() image_pred_ = image_pred__[class_mask_ind].view(-1, 7) if torch.sum(cls_mask) == 0: return image_pred_ else: image_pred_ = image_pred__ # Get the various classes detected in the image try: # img_classes = unique(image_pred_[:, -1]) img_classes = torch.unique(image_pred_[:, -1], sorted=True).float() except: continue # We will do NMS classwise # import ipdb;ipdb.set_trace() for cls in img_classes: # get the detections with one particular class cls_mask = image_pred_*(image_pred_[:, -1] == cls).float().unsqueeze(1) class_mask_ind = torch.nonzero(cls_mask[:, -2]).squeeze() image_pred_class = image_pred_[class_mask_ind].view(-1, 7) # sort the detections such that the entry with the maximum objectness # confidence is at the top conf_sort_index = torch.sort(image_pred_class[:, 4], descending=True)[1] image_pred_class = image_pred_class[conf_sort_index] idx = image_pred_class.size(0) # from soft_NMS import soft_nms # boxes = image_pred_class[:,:4] # scores = image_pred_class[:, 4] # k, N = soft_nms(boxes, scores, method=2) # image_pred_class = image_pred_class[k] # if nms has to be done if nms: # For each detection for i in range(idx): # Get the IOUs of all boxes that come after the one we are looking at # in the loop try: ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:]) except ValueError: break except IndexError: break # Zero out all the detections that have IoU > threshold iou_mask = (ious < nms_conf).float().unsqueeze(1) image_pred_class[i+1:] *= iou_mask # Remove the zero entries non_zero_ind = torch.nonzero(image_pred_class[:, 4]).squeeze() image_pred_class = image_pred_class[non_zero_ind].view(-1, 7) # Concatenate the batch_id of the image to the detection # this helps us identify which image does the detection correspond to # We use a linear structure to hold ALL the detections from the batch # the batch_dim is flattened # batch is identified by extra batch column batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) seq = batch_ind, image_pred_class if not write: output = torch.cat(seq, 1) write = True else: out = torch.cat(seq, 1) output = torch.cat((output, out)) return output