Spaces:
Runtime error
Runtime error
# MIT License | |
# Copyright (c) 2022 Intelligent Systems Lab Org | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
# File author: Zhenyu Li | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch.cuda.amp as amp | |
import numpy as np | |
KEY_OUTPUT = 'metric_depth' | |
def extract_key(prediction, key): | |
if isinstance(prediction, dict): | |
return prediction[key] | |
return prediction | |
# Main loss function used for ZoeDepth. Copy/paste from AdaBins repo (https://github.com/shariqfarooq123/AdaBins/blob/0952d91e9e762be310bb4cd055cbfe2448c0ce20/loss.py#L7) | |
class SILogLoss(nn.Module): | |
"""SILog loss (pixel-wise)""" | |
def __init__(self, beta=0.15): | |
super(SILogLoss, self).__init__() | |
self.name = 'SILog' | |
self.beta = beta | |
def forward(self, input, target, mask=None): | |
input = extract_key(input, KEY_OUTPUT) | |
if mask is not None: | |
input_filtered = input[mask] | |
target_filtered = target[mask] | |
with amp.autocast(enabled=False): # amp causes NaNs in this loss function | |
alpha = 1e-7 | |
g = torch.log(input_filtered + alpha) - torch.log(target_filtered + alpha) | |
Dg = torch.var(g) + self.beta * torch.pow(torch.mean(g), 2) | |
loss = 10 * torch.sqrt(Dg) | |
if torch.isnan(loss): | |
print("Nan SILog loss") | |
print("input:", input.shape) | |
print("target:", target.shape) | |
print("G", torch.sum(torch.isnan(g))) | |
print("Input min max", torch.min(input), torch.max(input)) | |
print("Target min max", torch.min(target), torch.max(target)) | |
print("Dg", torch.isnan(Dg)) | |
print("loss", torch.isnan(loss)) | |
return loss | |
import torch | |
import torch.nn.functional as F | |
def gaussian(mu, sigma, labels): | |
return torch.exp(-0.5*(mu-labels)** 2/ sigma** 2)/sigma | |
def laplacian(mu, b, labels): | |
# a = torch.abs(mu-labels)/b | |
# print("1 isnan: {}".format(torch.isnan(a).any())) | |
# print("1 isinf: {}".format(torch.isinf(a).any())) | |
# a = torch.exp(-(torch.abs(mu-labels)/b)) | |
# print(a) | |
# print("1 isnan: {}".format(torch.isnan(a).any())) | |
# print("1 isinf: {}".format(torch.isinf(a).any())) | |
return 0.5 * torch.exp(-(torch.abs(mu-labels)/b))/b | |
def distribution(mu, sigma, labels, dist="gaussian"): | |
return gaussian(mu, sigma, labels) if dist=="gaussian" else \ | |
laplacian(mu, sigma, labels) | |
def bimodal_loss(mu0, mu1, sigma0, sigma1, w0, w1, labels, dist="gaussian"): | |
# first_term = w0 * distribution(mu0, sigma0, labels, dist) | |
# print(first_term) | |
# print("f isnan: {}".format(torch.isnan(first_term).any())) | |
# print("f isinf: {}".format(torch.isinf(first_term).any())) | |
# second_term = w1 * distribution(mu1, sigma1, labels, dist) | |
# print(second_term) | |
# print("s isnan: {}".format(torch.isnan(second_term).any())) | |
# print("s isinf: {}".format(torch.isinf(second_term).any())) | |
loss = w0 * distribution(mu0, sigma0, labels, dist) + w1 * distribution(mu1, sigma1, labels, dist) | |
# loss = torch.clamp(loss, min=1e-12) | |
# print(loss) | |
return - torch.log(loss) | |
def unimodal_loss(mu, sigma, labels): | |
return torch.abs(mu - labels)/sigma + torch.log(sigma) | |
def smooth_l1_loss(preds, labels, reduce=None): | |
return F.smooth_l1_loss(preds, labels, reduce=reduce) | |
def l1_loss(preds, labels, reduce=None): | |
return F.l1_loss(preds, labels, reduce=reduce) | |
class DistributionLoss(nn.Module): | |
def __init__(self, max_depth): | |
super(DistributionLoss, self).__init__() | |
self.name = 'DistributionLoss' | |
self.max_depth = max_depth | |
def forward(self, input, target, mask=None, dist='biLaplacian'): | |
mu0 = input['mu0'] | |
mu1 = input['mu1'] | |
sigma0 = input['sigma0'] | |
sigma1 = input['sigma1'] | |
pi0 = input['pi0'] | |
pi1 = input['pi1'] | |
pred_mask = (pi0 / sigma0 > pi1 / sigma1).float() | |
pred_depth = (mu0 * pred_mask + mu1 * (1. - pred_mask)) | |
pred_metric_depth = (1 - pred_depth) * self.max_depth | |
if mask is not None: | |
mu0 = mu0[mask] | |
mu1 = mu1[mask] | |
sigma0 = sigma0[mask] | |
sigma1 = sigma1[mask] | |
pi0 = pi0[mask] | |
pi1 = pi1[mask] | |
# real_input = real_depth[mask] | |
real_input = mu0 | |
pred_metric_depth = pred_metric_depth[mask] | |
record_target = target[mask] | |
target_filtered = 1 - target[mask] / self.max_depth | |
bi_loss = bimodal_loss(mu0, mu1, sigma0, sigma1, pi0, pi1, target_filtered, dist=dist).mean() | |
# print(bi_loss) | |
alpha = 1e-7 | |
beta = 0.15 | |
g = torch.log(real_input + alpha) - torch.log(record_target + alpha) | |
Dg = torch.var(g) + beta * torch.pow(torch.mean(g), 2) | |
sig_loss = 10 * torch.sqrt(Dg) | |
# print(sig_loss) | |
return bi_loss, sig_loss | |