#!/usr/bin/env python3 # Copyright (c) Megvii Inc. All rights reserved. import numpy as np import torch import torchvision __all__ = [ "filter_box", "postprocess", "bboxes_iou", "matrix_iou", "adjust_box_anns", "xyxy2xywh", "xyxy2cxcywh", "cxcywh2xyxy", ] def filter_box(output, scale_range): """ output: (N, 5+class) shape """ min_scale, max_scale = scale_range w = output[:, 2] - output[:, 0] h = output[:, 3] - output[:, 1] keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale) return output[keep] def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45): # TODO: 矢印のみの推論を弾くような処理が必要 box_corner = prediction.new(prediction.shape) # xの中心座標から左上の座標、右下の座標に変換 # 左上 box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2 box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2 # 右下 box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2 box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2 # 予測結果の座標を変換して、box_cornerに格納 prediction[:, :, :4] = box_corner[:, :, :4] output = [None for _ in range(len(prediction))] for i, image_pred in enumerate(prediction): # If none are remaining => process next image if not image_pred.size(0): continue # Get score and class with highest confidence # 1. 閾値を超える各クラスの確信度に対するマスクを取得 conf_mask_multi = (image_pred[:, 5:5 + num_classes] * image_pred[:, 4].unsqueeze(-1)) >= conf_thre # 2. マスクを使用して、対応するクラスの確信度とクラスインデックスを取得 class_conf_multi = image_pred[:, 5:5 + num_classes][conf_mask_multi] class_idx_multi = (conf_mask_multi.nonzero(as_tuple=True)[1]).float().unsqueeze(-1) # 3. detections_multiテンソルを作成 detections_multi = torch.cat(( image_pred[:, :5].repeat_interleave(torch.sum(conf_mask_multi, dim=1), dim=0), class_conf_multi.unsqueeze(-1), class_idx_multi ), 1) # 4. NMSを実行 ここではクラスごとに実行する(red と arrowが混在している場合に対応するため) multi_nm_out_index = torchvision.ops.batched_nms( detections_multi[:, :4], detections_multi[:, 4] * detections_multi[:, 5], detections_multi[:, 6], nms_thre, ) detections_multi = detections_multi[multi_nm_out_index] if output[i] is None: output[i] = detections_multi else: output[i] = torch.cat((output[i], detections_multi)) return output def bboxes_iou(bboxes_a, bboxes_b, xyxy=True): if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4: raise IndexError if xyxy: tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2]) br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:]) area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1) area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1) else: tl = torch.max( (bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2), (bboxes_b[:, :2] - bboxes_b[:, 2:] / 2), ) br = torch.min( (bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2), (bboxes_b[:, :2] + bboxes_b[:, 2:] / 2), ) area_a = torch.prod(bboxes_a[:, 2:], 1) area_b = torch.prod(bboxes_b[:, 2:], 1) en = (tl < br).type(tl.type()).prod(dim=2) area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all()) return area_i / (area_a[:, None] + area_b - area_i) def matrix_iou(a, b): """ return iou of a and b, numpy version for data augenmentation """ lt = np.maximum(a[:, np.newaxis, :2], b[:, :2]) rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:]) area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2) area_a = np.prod(a[:, 2:] - a[:, :2], axis=1) area_b = np.prod(b[:, 2:] - b[:, :2], axis=1) return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12) def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max): bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max) bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max) return bbox def xyxy2xywh(bboxes): bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] return bboxes def xyxy2cxcywh(bboxes): bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0] bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1] bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5 bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5 return bboxes def cxcywh2xyxy(bboxes): bboxes[:, 0] = bboxes[:, 0] - bboxes[:, 2] * 0.5 bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5 bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2] bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3] return bboxes