|
|
|
|
|
|
|
|
|
import torch |
|
import torch.nn as nn |
|
from torch.autograd import Variable |
|
from library.ssd import jaccard, intersect |
|
import numpy as np |
|
|
|
class SSDSingleClassLoss(nn.Module): |
|
"""SSD Loss Function |
|
Compute Targets: |
|
1) Produce indices for positive matches by matching ground truth boxes |
|
with (default) 'priorboxes' that have jaccard index > threshold parameter |
|
(default threshold: 0.5). |
|
|
|
2) Calculates location and confidence loss for positive matches |
|
|
|
3) Hard negative mining to filter the excessive number of negative examples |
|
that comes with using a large number of default bounding boxes. |
|
- Negative match background CFs are sorted in ascending order (less confident pred. first) |
|
- If Positive match exists |
|
- Nneg is calculated by Mining_Neg2PosRatio * Npos, clipped below with min_NegMiningSample |
|
- Smallest Nneg background CFs are selected, CF's above maxBackroundCFforLossCalc are ommitted and used in loss calc |
|
- If there is no positive match, min_NegMiningSample less confident background CFs are taken in to loss |
|
|
|
Objective Loss: |
|
L(x,c,l,g) = [(LconfPosMatch(x, c)) / Npos] + |
|
[(λ * LconfNegMatch(x, c)) / Nneg] + [(α*Lloc(x,l,g)) / Npos] |
|
|
|
|
|
Where, LconfPosMatch is the log softmax person class conf loss of positive matched boxes, |
|
LconfNegMatch is the log softmax background class conf loss of negative matched boxes, |
|
Lloc is the SmoothL1 Loss weighted by α which is set to 1 by cross val for original multiclass SSD. |
|
|
|
Args: |
|
c: class confidences, |
|
l: predicted boxes, |
|
g: ground truth boxes |
|
Npos: number of matched default boxes |
|
Neg: number of negative matches used in loss function after negative mining |
|
x: positive match selector |
|
""" |
|
|
|
def __init__(self, Anchor_box_wh, Anchor_box_xy, alpha = 1, Jaccardtreshold = 0.5, |
|
Mining_Neg2PosRatio = 6, min_NegMiningSample = 10, maxBackroundCFforLossCalc = 0.5, negConfLosslambda = 1.0, |
|
regularizedLayers = None): |
|
''' |
|
Args: |
|
Anchor_box_wh: (tensor) Anchor boxes (cx,cy, w, h) form in original image, Shape: [numPreds=5376,4] |
|
Anchor_box_xy: (tensor) Anchor boxes (cxmin,cymin, xmax, ymax) form in original image, Shape: [numPreds=5376,4] |
|
''' |
|
|
|
super(SSDSingleClassLoss, self).__init__() |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
self.device = device |
|
self.Anchor_box_wh = Anchor_box_wh |
|
self.Anchor_box_xy = Anchor_box_xy |
|
self.alpha = alpha |
|
self.Jaccardtreshold = Jaccardtreshold |
|
|
|
self.Mining_Neg2PosRatio = Mining_Neg2PosRatio |
|
self.min_NegMiningSample = min_NegMiningSample |
|
self.maxBackroundCFforLossCalc = maxBackroundCFforLossCalc |
|
self.negConfLosslambda = negConfLosslambda |
|
|
|
self.regularizedLayers = regularizedLayers |
|
|
|
|
|
self.var_x = 0.1 |
|
self.var_y = 0.1 |
|
self.var_w = 0.2 |
|
self.var_h = 0.2 |
|
|
|
|
|
def forward(self, pred_box_delt, pred_CF ,GT_box_wh, model= None): |
|
"""Multibox Loss |
|
Args: |
|
pred_box_delt : (tensor) Location predictions in delta form (dcx, dcy, dw, dh), shape[numPreds=5376,4] |
|
pred_CF : (tensor) Confidence predictions (person, nonperson), shape[numPreds=5376,2] |
|
GT_box_wh : (tensor) Ground truth boxes in (xmin, ymin, w, h) form, shape [numObjects, 4] |
|
""" |
|
|
|
|
|
device =self.device |
|
alpha = self.alpha |
|
Jaccardtreshold = self.Jaccardtreshold |
|
Mining_Neg2PosRatio = self.Mining_Neg2PosRatio |
|
min_NegMiningSample = self.min_NegMiningSample |
|
maxBackroundCFforLossCalc = self.maxBackroundCFforLossCalc |
|
negConfLosslambda = self.negConfLosslambda |
|
|
|
reg = torch.tensor(.0).to(device) |
|
|
|
if (len(GT_box_wh)==0): |
|
loss_l = torch.tensor([.0]) |
|
num_pos = 0 |
|
else: |
|
GT_box_wh = GT_box_wh[:,1:] |
|
|
|
|
|
GT_box_cxcy_wh = GT_box_wh.clone().to(device) |
|
GT_box_cxcy_wh[:,0] = GT_box_wh[:,0]+GT_box_wh[:,2]/2 |
|
GT_box_cxcy_wh[:,1] = GT_box_wh[:,1]+GT_box_wh[:,3]/2 |
|
|
|
|
|
GT_box_xy = GT_box_wh.detach().clone().to(device) |
|
GT_box_xy[:,2] = GT_box_wh[:,2] + GT_box_wh[:,0] |
|
GT_box_xy[:,3] = GT_box_wh[:,3] + GT_box_wh[:,1] |
|
|
|
|
|
JaccardIndices = jaccard(self.Anchor_box_xy,GT_box_xy) |
|
posMatches = torch.nonzero(JaccardIndices >= Jaccardtreshold) |
|
negMatchAnchIdx = torch.nonzero(JaccardIndices.max(dim=1).values < Jaccardtreshold).flatten() |
|
|
|
|
|
|
|
posMatchAnchIdx = posMatches[:,0] |
|
posMatchGTIdx = posMatches[:,1] |
|
|
|
pred_backGrCF = pred_CF[:,1] |
|
negMatch_pred_backGrCF = pred_backGrCF[negMatchAnchIdx] |
|
|
|
|
|
posMatchAnchs = self.Anchor_box_wh[posMatchAnchIdx] |
|
num_pos = posMatches.shape[0] |
|
|
|
if num_pos: |
|
posMatch_pred_box_delt = pred_box_delt[posMatchAnchIdx] |
|
posMatch_pred_CF = pred_CF[posMatchAnchIdx][:,0] |
|
|
|
posMatchGTs = GT_box_cxcy_wh[posMatchGTIdx] |
|
|
|
|
|
|
|
ghat_cx = (posMatchGTs[:,0]-posMatchAnchs[:,0])/posMatchAnchs[:,2]/self.var_x |
|
ghat_cy = (posMatchGTs[:,1]-posMatchAnchs[:,1])/posMatchAnchs[:,3]/self.var_y |
|
ghat_w = torch.log(posMatchGTs[:,2]/posMatchAnchs[:,2])/self.var_w |
|
ghat_h = torch.log(posMatchGTs[:,3]/posMatchAnchs[:,3])/self.var_h |
|
ghat = torch.cat((ghat_cx.unsqueeze(1), ghat_cy.unsqueeze(1), ghat_w.unsqueeze(1), ghat_h.unsqueeze(1)),dim=1) |
|
|
|
|
|
smoothL1 = torch.nn.SmoothL1Loss(reduction='sum', beta=1.0).to(device) |
|
ghat_1D = ghat.view(1,-1) |
|
posMatch_pred_box_delt_1D = posMatch_pred_box_delt.view(1,-1) |
|
loc_loss = smoothL1(posMatch_pred_box_delt_1D, ghat_1D) |
|
|
|
|
|
posMatch_CF_loss = -torch.log(posMatch_pred_CF).sum() |
|
|
|
|
|
|
|
negMatch_pred_backGrCF,_=negMatch_pred_backGrCF.sort(0, descending=False) |
|
|
|
|
|
|
|
num_hardmined_negative = int(np.max([num_pos*Mining_Neg2PosRatio,min_NegMiningSample])) |
|
num_hardmined_negative = int(np.min([num_hardmined_negative, negMatch_pred_backGrCF.shape[0]])) |
|
negMatch_pred_backGrCF_mined = negMatch_pred_backGrCF[0:num_hardmined_negative] |
|
|
|
negMatch_pred_backGrCF_mined = negMatch_pred_backGrCF_mined[negMatch_pred_backGrCF_mined<maxBackroundCFforLossCalc] |
|
num_hardmined_negative = negMatch_pred_backGrCF_mined.shape[0] |
|
|
|
|
|
negMatch_CF_losses_mined = -torch.log(negMatch_pred_backGrCF_mined) |
|
negMatch_CF_loss = negMatch_CF_losses_mined.sum() |
|
if (num_hardmined_negative == 0): |
|
negMatch_CF_loss = torch.tensor(.0) |
|
else: |
|
negMatch_CF_loss = (negMatch_CF_loss / num_hardmined_negative)*negConfLosslambda |
|
|
|
|
|
|
|
|
|
loss_l = alpha*loc_loss / num_pos |
|
|
|
posMatch_CF_loss = posMatch_CF_loss / num_pos |
|
loss_c = (posMatch_CF_loss) + (negMatch_CF_loss) |
|
|
|
else: |
|
|
|
|
|
|
|
loss_l = torch.tensor(.0) |
|
posMatch_CF_loss = torch.tensor(.0) |
|
|
|
negCFs_sorted, _ = pred_CF[:,1].view(-1,1).sort(0,descending=False) |
|
num_hardmined_negative = int(min_NegMiningSample) |
|
negMatch_pred_backGrCF_mined = negCFs_sorted[0:num_hardmined_negative] |
|
negMatch_CF_losses_mined = -torch.log(negMatch_pred_backGrCF_mined) |
|
negMatch_CF_loss = negMatch_CF_losses_mined.sum() |
|
negMatch_CF_loss = (negMatch_CF_loss / num_hardmined_negative)*negConfLosslambda |
|
loss_c = negMatch_CF_loss |
|
|
|
|
|
if model != None: |
|
if (self.regularizedLayers != None): |
|
for layer,lamb in self.regularizedLayers: |
|
layer_attribute = getattr(model, layer) |
|
m = layer_attribute.op.weight.numel() + layer_attribute.op.bias.numel() |
|
reg += ((layer_attribute.op.bias.view(1,-1)**2).sum() + (layer_attribute.op.weight.view(1,-1)**2).sum())*lamb/m |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return loss_l + reg, loss_c |