OpenLenda / yolox /utils /boxes.py
yutyan's picture
Add app
37f5c2f
raw
history blame
5.16 kB
#!/usr/bin/env python3
# Copyright (c) Megvii Inc. All rights reserved.
import numpy as np
import torch
import torchvision
__all__ = [
"filter_box",
"postprocess",
"bboxes_iou",
"matrix_iou",
"adjust_box_anns",
"xyxy2xywh",
"xyxy2cxcywh",
"cxcywh2xyxy",
]
def filter_box(output, scale_range):
"""
output: (N, 5+class) shape
"""
min_scale, max_scale = scale_range
w = output[:, 2] - output[:, 0]
h = output[:, 3] - output[:, 1]
keep = (w * h > min_scale * min_scale) & (w * h < max_scale * max_scale)
return output[keep]
def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
# TODO: 矢印のみの推論を弾くような処理が必要
box_corner = prediction.new(prediction.shape)
# xの中心座標から左上の座標、右下の座標に変換
# 左上
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
# 右下
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
# 予測結果の座標を変換して、box_cornerに格納
prediction[:, :, :4] = box_corner[:, :, :4]
output = [None for _ in range(len(prediction))]
for i, image_pred in enumerate(prediction):
# If none are remaining => process next image
if not image_pred.size(0):
continue
# Get score and class with highest confidence
# 1. 閾値を超える各クラスの確信度に対するマスクを取得
conf_mask_multi = (image_pred[:, 5:5 + num_classes] * image_pred[:, 4].unsqueeze(-1)) >= conf_thre
# 2. マスクを使用して、対応するクラスの確信度とクラスインデックスを取得
class_conf_multi = image_pred[:, 5:5 + num_classes][conf_mask_multi]
class_idx_multi = (conf_mask_multi.nonzero(as_tuple=True)[1]).float().unsqueeze(-1)
# 3. detections_multiテンソルを作成
detections_multi = torch.cat((
image_pred[:, :5].repeat_interleave(torch.sum(conf_mask_multi, dim=1), dim=0),
class_conf_multi.unsqueeze(-1),
class_idx_multi
), 1)
# 4. NMSを実行 ここではクラスごとに実行する(red と arrowが混在している場合に対応するため)
multi_nm_out_index = torchvision.ops.batched_nms(
detections_multi[:, :4],
detections_multi[:, 4] * detections_multi[:, 5],
detections_multi[:, 6],
nms_thre,
)
detections_multi = detections_multi[multi_nm_out_index]
if output[i] is None:
output[i] = detections_multi
else:
output[i] = torch.cat((output[i], detections_multi))
return output
def bboxes_iou(bboxes_a, bboxes_b, xyxy=True):
if bboxes_a.shape[1] != 4 or bboxes_b.shape[1] != 4:
raise IndexError
if xyxy:
tl = torch.max(bboxes_a[:, None, :2], bboxes_b[:, :2])
br = torch.min(bboxes_a[:, None, 2:], bboxes_b[:, 2:])
area_a = torch.prod(bboxes_a[:, 2:] - bboxes_a[:, :2], 1)
area_b = torch.prod(bboxes_b[:, 2:] - bboxes_b[:, :2], 1)
else:
tl = torch.max(
(bboxes_a[:, None, :2] - bboxes_a[:, None, 2:] / 2),
(bboxes_b[:, :2] - bboxes_b[:, 2:] / 2),
)
br = torch.min(
(bboxes_a[:, None, :2] + bboxes_a[:, None, 2:] / 2),
(bboxes_b[:, :2] + bboxes_b[:, 2:] / 2),
)
area_a = torch.prod(bboxes_a[:, 2:], 1)
area_b = torch.prod(bboxes_b[:, 2:], 1)
en = (tl < br).type(tl.type()).prod(dim=2)
area_i = torch.prod(br - tl, 2) * en # * ((tl < br).all())
return area_i / (area_a[:, None] + area_b - area_i)
def matrix_iou(a, b):
"""
return iou of a and b, numpy version for data augenmentation
"""
lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
return area_i / (area_a[:, np.newaxis] + area_b - area_i + 1e-12)
def adjust_box_anns(bbox, scale_ratio, padw, padh, w_max, h_max):
bbox[:, 0::2] = np.clip(bbox[:, 0::2] * scale_ratio + padw, 0, w_max)
bbox[:, 1::2] = np.clip(bbox[:, 1::2] * scale_ratio + padh, 0, h_max)
return bbox
def xyxy2xywh(bboxes):
bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
return bboxes
def xyxy2cxcywh(bboxes):
bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 0]
bboxes[:, 3] = bboxes[:, 3] - bboxes[:, 1]
bboxes[:, 0] = bboxes[:, 0] + bboxes[:, 2] * 0.5
bboxes[:, 1] = bboxes[:, 1] + bboxes[:, 3] * 0.5
return bboxes
def cxcywh2xyxy(bboxes):
bboxes[:, 0] = bboxes[:, 0] - bboxes[:, 2] * 0.5
bboxes[:, 1] = bboxes[:, 1] - bboxes[:, 3] * 0.5
bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
return bboxes