Spaces:
Sleeping
Sleeping
""" | |
Loss function implementations. | |
""" | |
import numpy as np | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
from kornia.geometry import warp_perspective | |
from ..misc.geometry_utils import keypoints_to_grid, get_dist_mask, get_common_line_mask | |
def get_loss_and_weights(model_cfg, device=torch.device("cuda")): | |
"""Get loss functions and either static or dynamic weighting.""" | |
# Get the global weighting policy | |
w_policy = model_cfg.get("weighting_policy", "static") | |
if not w_policy in ["static", "dynamic"]: | |
raise ValueError("[Error] Not supported weighting policy.") | |
loss_func = {} | |
loss_weight = {} | |
# Get junction loss function and weight | |
w_junc, junc_loss_func = get_junction_loss_and_weight(model_cfg, w_policy) | |
loss_func["junc_loss"] = junc_loss_func.to(device) | |
loss_weight["w_junc"] = w_junc | |
# Get heatmap loss function and weight | |
w_heatmap, heatmap_loss_func = get_heatmap_loss_and_weight( | |
model_cfg, w_policy, device | |
) | |
loss_func["heatmap_loss"] = heatmap_loss_func.to(device) | |
loss_weight["w_heatmap"] = w_heatmap | |
# [Optionally] get descriptor loss function and weight | |
if model_cfg.get("descriptor_loss_func", None) is not None: | |
w_descriptor, descriptor_loss_func = get_descriptor_loss_and_weight( | |
model_cfg, w_policy | |
) | |
loss_func["descriptor_loss"] = descriptor_loss_func.to(device) | |
loss_weight["w_desc"] = w_descriptor | |
return loss_func, loss_weight | |
def get_junction_loss_and_weight(model_cfg, global_w_policy): | |
"""Get the junction loss function and weight.""" | |
junction_loss_cfg = model_cfg.get("junction_loss_cfg", {}) | |
# Get the junction loss weight | |
w_policy = junction_loss_cfg.get("policy", global_w_policy) | |
if w_policy == "static": | |
w_junc = torch.tensor(model_cfg["w_junc"], dtype=torch.float32) | |
elif w_policy == "dynamic": | |
w_junc = nn.Parameter( | |
torch.tensor(model_cfg["w_junc"], dtype=torch.float32), requires_grad=True | |
) | |
else: | |
raise ValueError("[Error] Unknown weighting policy for junction loss weight.") | |
# Get the junction loss function | |
junc_loss_name = model_cfg.get("junction_loss_func", "superpoint") | |
if junc_loss_name == "superpoint": | |
junc_loss_func = JunctionDetectionLoss( | |
model_cfg["grid_size"], model_cfg["keep_border_valid"] | |
) | |
else: | |
raise ValueError("[Error] Not supported junction loss function.") | |
return w_junc, junc_loss_func | |
def get_heatmap_loss_and_weight(model_cfg, global_w_policy, device): | |
"""Get the heatmap loss function and weight.""" | |
heatmap_loss_cfg = model_cfg.get("heatmap_loss_cfg", {}) | |
# Get the heatmap loss weight | |
w_policy = heatmap_loss_cfg.get("policy", global_w_policy) | |
if w_policy == "static": | |
w_heatmap = torch.tensor(model_cfg["w_heatmap"], dtype=torch.float32) | |
elif w_policy == "dynamic": | |
w_heatmap = nn.Parameter( | |
torch.tensor(model_cfg["w_heatmap"], dtype=torch.float32), | |
requires_grad=True, | |
) | |
else: | |
raise ValueError("[Error] Unknown weighting policy for junction loss weight.") | |
# Get the corresponding heatmap loss based on the config | |
heatmap_loss_name = model_cfg.get("heatmap_loss_func", "cross_entropy") | |
if heatmap_loss_name == "cross_entropy": | |
# Get the heatmap class weight (always static) | |
heatmap_class_w = model_cfg.get("w_heatmap_class", 1.0) | |
class_weight = ( | |
torch.tensor(np.array([1.0, heatmap_class_w])).to(torch.float).to(device) | |
) | |
heatmap_loss_func = HeatmapLoss(class_weight=class_weight) | |
else: | |
raise ValueError("[Error] Not supported heatmap loss function.") | |
return w_heatmap, heatmap_loss_func | |
def get_descriptor_loss_and_weight(model_cfg, global_w_policy): | |
"""Get the descriptor loss function and weight.""" | |
descriptor_loss_cfg = model_cfg.get("descriptor_loss_cfg", {}) | |
# Get the descriptor loss weight | |
w_policy = descriptor_loss_cfg.get("policy", global_w_policy) | |
if w_policy == "static": | |
w_descriptor = torch.tensor(model_cfg["w_desc"], dtype=torch.float32) | |
elif w_policy == "dynamic": | |
w_descriptor = nn.Parameter( | |
torch.tensor(model_cfg["w_desc"], dtype=torch.float32), requires_grad=True | |
) | |
else: | |
raise ValueError("[Error] Unknown weighting policy for descriptor loss weight.") | |
# Get the descriptor loss function | |
descriptor_loss_name = model_cfg.get("descriptor_loss_func", "regular_sampling") | |
if descriptor_loss_name == "regular_sampling": | |
descriptor_loss_func = TripletDescriptorLoss( | |
descriptor_loss_cfg["grid_size"], | |
descriptor_loss_cfg["dist_threshold"], | |
descriptor_loss_cfg["margin"], | |
) | |
else: | |
raise ValueError("[Error] Not supported descriptor loss function.") | |
return w_descriptor, descriptor_loss_func | |
def space_to_depth(input_tensor, grid_size): | |
"""PixelUnshuffle for pytorch.""" | |
N, C, H, W = input_tensor.size() | |
# (N, C, H//bs, bs, W//bs, bs) | |
x = input_tensor.view(N, C, H // grid_size, grid_size, W // grid_size, grid_size) | |
# (N, bs, bs, C, H//bs, W//bs) | |
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() | |
# (N, C*bs^2, H//bs, W//bs) | |
x = x.view(N, C * (grid_size**2), H // grid_size, W // grid_size) | |
return x | |
def junction_detection_loss( | |
junction_map, junc_predictions, valid_mask=None, grid_size=8, keep_border=True | |
): | |
"""Junction detection loss.""" | |
# Convert junc_map to channel tensor | |
junc_map = space_to_depth(junction_map, grid_size) | |
map_shape = junc_map.shape[-2:] | |
batch_size = junc_map.shape[0] | |
dust_bin_label = ( | |
torch.ones([batch_size, 1, map_shape[0], map_shape[1]]) | |
.to(junc_map.device) | |
.to(torch.int) | |
) | |
junc_map = torch.cat([junc_map * 2, dust_bin_label], dim=1) | |
labels = torch.argmax( | |
junc_map.to(torch.float) | |
+ torch.distributions.Uniform(0, 0.1) | |
.sample(junc_map.shape) | |
.to(junc_map.device), | |
dim=1, | |
) | |
# Also convert the valid mask to channel tensor | |
valid_mask = torch.ones(junction_map.shape) if valid_mask is None else valid_mask | |
valid_mask = space_to_depth(valid_mask, grid_size) | |
# Compute junction loss on the border patch or not | |
if keep_border: | |
valid_mask = ( | |
torch.sum(valid_mask.to(torch.bool).to(torch.int), dim=1, keepdim=True) > 0 | |
) | |
else: | |
valid_mask = ( | |
torch.sum(valid_mask.to(torch.bool).to(torch.int), dim=1, keepdim=True) | |
>= grid_size * grid_size | |
) | |
# Compute the classification loss | |
loss_func = nn.CrossEntropyLoss(reduction="none") | |
# The loss still need NCHW format | |
loss = loss_func(input=junc_predictions, target=labels.to(torch.long)) | |
# Weighted sum by the valid mask | |
loss_ = torch.sum( | |
loss * torch.squeeze(valid_mask.to(torch.float), dim=1), dim=[0, 1, 2] | |
) | |
loss_final = loss_ / torch.sum(torch.squeeze(valid_mask.to(torch.float), dim=1)) | |
return loss_final | |
def heatmap_loss(heatmap_gt, heatmap_pred, valid_mask=None, class_weight=None): | |
"""Heatmap prediction loss.""" | |
# Compute the classification loss on each pixel | |
if class_weight is None: | |
loss_func = nn.CrossEntropyLoss(reduction="none") | |
else: | |
loss_func = nn.CrossEntropyLoss(class_weight, reduction="none") | |
loss = loss_func( | |
input=heatmap_pred, target=torch.squeeze(heatmap_gt.to(torch.long), dim=1) | |
) | |
# Weighted sum by the valid mask | |
# Sum over H and W | |
loss_spatial_sum = torch.sum( | |
loss * torch.squeeze(valid_mask.to(torch.float), dim=1), dim=[1, 2] | |
) | |
valid_spatial_sum = torch.sum( | |
torch.squeeze(valid_mask.to(torch.float32), dim=1), dim=[1, 2] | |
) | |
# Mean to single scalar over batch dimension | |
loss = torch.sum(loss_spatial_sum) / torch.sum(valid_spatial_sum) | |
return loss | |
class JunctionDetectionLoss(nn.Module): | |
"""Junction detection loss.""" | |
def __init__(self, grid_size, keep_border): | |
super(JunctionDetectionLoss, self).__init__() | |
self.grid_size = grid_size | |
self.keep_border = keep_border | |
def forward(self, prediction, target, valid_mask=None): | |
return junction_detection_loss( | |
target, prediction, valid_mask, self.grid_size, self.keep_border | |
) | |
class HeatmapLoss(nn.Module): | |
"""Heatmap prediction loss.""" | |
def __init__(self, class_weight): | |
super(HeatmapLoss, self).__init__() | |
self.class_weight = class_weight | |
def forward(self, prediction, target, valid_mask=None): | |
return heatmap_loss(target, prediction, valid_mask, self.class_weight) | |
class RegularizationLoss(nn.Module): | |
"""Module for regularization loss.""" | |
def __init__(self): | |
super(RegularizationLoss, self).__init__() | |
self.name = "regularization_loss" | |
self.loss_init = torch.zeros([]) | |
def forward(self, loss_weights): | |
# Place it to the same device | |
loss = self.loss_init.to(loss_weights["w_junc"].device) | |
for _, val in loss_weights.items(): | |
if isinstance(val, nn.Parameter): | |
loss += val | |
return loss | |
def triplet_loss( | |
desc_pred1, | |
desc_pred2, | |
points1, | |
points2, | |
line_indices, | |
epoch, | |
grid_size=8, | |
dist_threshold=8, | |
init_dist_threshold=64, | |
margin=1, | |
): | |
"""Regular triplet loss for descriptor learning.""" | |
b_size, _, Hc, Wc = desc_pred1.size() | |
img_size = (Hc * grid_size, Wc * grid_size) | |
device = desc_pred1.device | |
# Extract valid keypoints | |
n_points = line_indices.size()[1] | |
valid_points = line_indices.bool().flatten() | |
n_correct_points = torch.sum(valid_points).item() | |
if n_correct_points == 0: | |
return torch.tensor(0.0, dtype=torch.float, device=device) | |
# Check which keypoints are too close to be matched | |
# dist_threshold is decreased at each epoch for easier training | |
dist_threshold = max(dist_threshold, 2 * init_dist_threshold // (epoch + 1)) | |
dist_mask = get_dist_mask(points1, points2, valid_points, dist_threshold) | |
# Additionally ban negative mining along the same line | |
common_line_mask = get_common_line_mask(line_indices, valid_points) | |
dist_mask = dist_mask | common_line_mask | |
# Convert the keypoints to a grid suitable for interpolation | |
grid1 = keypoints_to_grid(points1, img_size) | |
grid2 = keypoints_to_grid(points2, img_size) | |
# Extract the descriptors | |
desc1 = ( | |
F.grid_sample(desc_pred1, grid1) | |
.permute(0, 2, 3, 1) | |
.reshape(b_size * n_points, -1)[valid_points] | |
) | |
desc1 = F.normalize(desc1, dim=1) | |
desc2 = ( | |
F.grid_sample(desc_pred2, grid2) | |
.permute(0, 2, 3, 1) | |
.reshape(b_size * n_points, -1)[valid_points] | |
) | |
desc2 = F.normalize(desc2, dim=1) | |
desc_dists = 2 - 2 * (desc1 @ desc2.t()) | |
# Positive distance loss | |
pos_dist = torch.diag(desc_dists) | |
# Negative distance loss | |
max_dist = torch.tensor(4.0, dtype=torch.float, device=device) | |
desc_dists[ | |
torch.arange(n_correct_points, dtype=torch.long), | |
torch.arange(n_correct_points, dtype=torch.long), | |
] = max_dist | |
desc_dists[dist_mask] = max_dist | |
neg_dist = torch.min( | |
torch.min(desc_dists, dim=1)[0], torch.min(desc_dists, dim=0)[0] | |
) | |
triplet_loss = F.relu(margin + pos_dist - neg_dist) | |
return triplet_loss, grid1, grid2, valid_points | |
class TripletDescriptorLoss(nn.Module): | |
"""Triplet descriptor loss.""" | |
def __init__(self, grid_size, dist_threshold, margin): | |
super(TripletDescriptorLoss, self).__init__() | |
self.grid_size = grid_size | |
self.init_dist_threshold = 64 | |
self.dist_threshold = dist_threshold | |
self.margin = margin | |
def forward(self, desc_pred1, desc_pred2, points1, points2, line_indices, epoch): | |
return self.descriptor_loss( | |
desc_pred1, desc_pred2, points1, points2, line_indices, epoch | |
) | |
# The descriptor loss based on regularly sampled points along the lines | |
def descriptor_loss( | |
self, desc_pred1, desc_pred2, points1, points2, line_indices, epoch | |
): | |
return torch.mean( | |
triplet_loss( | |
desc_pred1, | |
desc_pred2, | |
points1, | |
points2, | |
line_indices, | |
epoch, | |
self.grid_size, | |
self.dist_threshold, | |
self.init_dist_threshold, | |
self.margin, | |
)[0] | |
) | |
class TotalLoss(nn.Module): | |
"""Total loss summing junction, heatma, descriptor | |
and regularization losses.""" | |
def __init__(self, loss_funcs, loss_weights, weighting_policy): | |
super(TotalLoss, self).__init__() | |
# Whether we need to compute the descriptor loss | |
self.compute_descriptors = "descriptor_loss" in loss_funcs.keys() | |
self.loss_funcs = loss_funcs | |
self.loss_weights = loss_weights | |
self.weighting_policy = weighting_policy | |
# Always add regularization loss (it will return zero if not used) | |
self.loss_funcs["reg_loss"] = RegularizationLoss().cuda() | |
def forward( | |
self, junc_pred, junc_target, heatmap_pred, heatmap_target, valid_mask=None | |
): | |
"""Detection only loss.""" | |
# Compute the junction loss | |
junc_loss = self.loss_funcs["junc_loss"](junc_pred, junc_target, valid_mask) | |
# Compute the heatmap loss | |
heatmap_loss = self.loss_funcs["heatmap_loss"]( | |
heatmap_pred, heatmap_target, valid_mask | |
) | |
# Compute the total loss. | |
if self.weighting_policy == "dynamic": | |
reg_loss = self.loss_funcs["reg_loss"](self.loss_weights) | |
total_loss = ( | |
junc_loss * torch.exp(-self.loss_weights["w_junc"]) | |
+ heatmap_loss * torch.exp(-self.loss_weights["w_heatmap"]) | |
+ reg_loss | |
) | |
return { | |
"total_loss": total_loss, | |
"junc_loss": junc_loss, | |
"heatmap_loss": heatmap_loss, | |
"reg_loss": reg_loss, | |
"w_junc": torch.exp(-self.loss_weights["w_junc"]).item(), | |
"w_heatmap": torch.exp(-self.loss_weights["w_heatmap"]).item(), | |
} | |
elif self.weighting_policy == "static": | |
total_loss = ( | |
junc_loss * self.loss_weights["w_junc"] | |
+ heatmap_loss * self.loss_weights["w_heatmap"] | |
) | |
return { | |
"total_loss": total_loss, | |
"junc_loss": junc_loss, | |
"heatmap_loss": heatmap_loss, | |
} | |
else: | |
raise ValueError("[Error] Unknown weighting policy.") | |
def forward_descriptors( | |
self, | |
junc_map_pred1, | |
junc_map_pred2, | |
junc_map_target1, | |
junc_map_target2, | |
heatmap_pred1, | |
heatmap_pred2, | |
heatmap_target1, | |
heatmap_target2, | |
line_points1, | |
line_points2, | |
line_indices, | |
desc_pred1, | |
desc_pred2, | |
epoch, | |
valid_mask1=None, | |
valid_mask2=None, | |
): | |
"""Loss for detection + description.""" | |
# Compute junction loss | |
junc_loss = self.loss_funcs["junc_loss"]( | |
torch.cat([junc_map_pred1, junc_map_pred2], dim=0), | |
torch.cat([junc_map_target1, junc_map_target2], dim=0), | |
torch.cat([valid_mask1, valid_mask2], dim=0), | |
) | |
# Get junction loss weight (dynamic or not) | |
if isinstance(self.loss_weights["w_junc"], nn.Parameter): | |
w_junc = torch.exp(-self.loss_weights["w_junc"]) | |
else: | |
w_junc = self.loss_weights["w_junc"] | |
# Compute heatmap loss | |
heatmap_loss = self.loss_funcs["heatmap_loss"]( | |
torch.cat([heatmap_pred1, heatmap_pred2], dim=0), | |
torch.cat([heatmap_target1, heatmap_target2], dim=0), | |
torch.cat([valid_mask1, valid_mask2], dim=0), | |
) | |
# Get heatmap loss weight (dynamic or not) | |
if isinstance(self.loss_weights["w_heatmap"], nn.Parameter): | |
w_heatmap = torch.exp(-self.loss_weights["w_heatmap"]) | |
else: | |
w_heatmap = self.loss_weights["w_heatmap"] | |
# Compute the descriptor loss | |
descriptor_loss = self.loss_funcs["descriptor_loss"]( | |
desc_pred1, desc_pred2, line_points1, line_points2, line_indices, epoch | |
) | |
# Get descriptor loss weight (dynamic or not) | |
if isinstance(self.loss_weights["w_desc"], nn.Parameter): | |
w_descriptor = torch.exp(-self.loss_weights["w_desc"]) | |
else: | |
w_descriptor = self.loss_weights["w_desc"] | |
# Update the total loss | |
total_loss = ( | |
junc_loss * w_junc | |
+ heatmap_loss * w_heatmap | |
+ descriptor_loss * w_descriptor | |
) | |
outputs = { | |
"junc_loss": junc_loss, | |
"heatmap_loss": heatmap_loss, | |
"w_junc": w_junc.item() if isinstance(w_junc, nn.Parameter) else w_junc, | |
"w_heatmap": w_heatmap.item() | |
if isinstance(w_heatmap, nn.Parameter) | |
else w_heatmap, | |
"descriptor_loss": descriptor_loss, | |
"w_desc": w_descriptor.item() | |
if isinstance(w_descriptor, nn.Parameter) | |
else w_descriptor, | |
} | |
# Compute the regularization loss | |
reg_loss = self.loss_funcs["reg_loss"](self.loss_weights) | |
total_loss += reg_loss | |
outputs.update({"reg_loss": reg_loss, "total_loss": total_loss}) | |
return outputs | |