Spaces:
Runtime error
Runtime error
# MIT License | |
# Copyright (c) 2022 Intelligent Systems Lab Org | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
# File author: Shariq Farooq Bhat | |
"""Miscellaneous utility functions.""" | |
from scipy import ndimage | |
import base64 | |
import math | |
import re | |
from io import BytesIO | |
import matplotlib | |
import matplotlib.cm | |
import numpy as np | |
import requests | |
import torch | |
import torch.distributed as dist | |
import torch.nn | |
import torch.nn as nn | |
import torch.utils.data.distributed | |
from PIL import Image | |
from torchvision.transforms import ToTensor | |
import cv2 | |
import matplotlib | |
class RunningAverage: | |
def __init__(self): | |
self.avg = 0 | |
self.count = 0 | |
def append(self, value): | |
self.avg = (value + self.count * self.avg) / (self.count + 1) | |
self.count += 1 | |
def get_value(self): | |
return self.avg | |
def denormalize(x): | |
"""Reverses the imagenet normalization applied to the input. | |
Args: | |
x (torch.Tensor - shape(N,3,H,W)): input tensor | |
Returns: | |
torch.Tensor - shape(N,3,H,W): Denormalized input | |
""" | |
mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(x.device) | |
std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(x.device) | |
return x * std + mean | |
class RunningAverageDict: | |
"""A dictionary of running averages.""" | |
def __init__(self): | |
self._dict = None | |
def update(self, new_dict): | |
if new_dict is None: | |
return | |
if self._dict is None: | |
self._dict = dict() | |
for key, value in new_dict.items(): | |
self._dict[key] = RunningAverage() | |
for key, value in new_dict.items(): | |
self._dict[key].append(value) | |
def get_value(self): | |
if self._dict is None: | |
return None | |
return {key: value.get_value() for key, value in self._dict.items()} | |
def colorize(value, vmin=None, vmax=None, cmap='turbo_r', invalid_val=-99, invalid_mask=None, background_color=(128, 128, 128, 255), gamma_corrected=False, value_transform=None): | |
"""Converts a depth map to a color image. | |
Args: | |
value (torch.Tensor, numpy.ndarry): Input depth map. Shape: (H, W) or (1, H, W) or (1, 1, H, W). All singular dimensions are squeezed | |
vmin (float, optional): vmin-valued entries are mapped to start color of cmap. If None, value.min() is used. Defaults to None. | |
vmax (float, optional): vmax-valued entries are mapped to end color of cmap. If None, value.max() is used. Defaults to None. | |
cmap (str, optional): matplotlib colormap to use. Defaults to 'magma_r'. | |
invalid_val (int, optional): Specifies value of invalid pixels that should be colored as 'background_color'. Defaults to -99. | |
invalid_mask (numpy.ndarray, optional): Boolean mask for invalid regions. Defaults to None. | |
background_color (tuple[int], optional): 4-tuple RGB color to give to invalid pixels. Defaults to (128, 128, 128, 255). | |
gamma_corrected (bool, optional): Apply gamma correction to colored image. Defaults to False. | |
value_transform (Callable, optional): Apply transform function to valid pixels before coloring. Defaults to None. | |
Returns: | |
numpy.ndarray, dtype - uint8: Colored depth map. Shape: (H, W, 4) | |
""" | |
if isinstance(value, torch.Tensor): | |
value = value.detach().cpu().numpy() | |
value = value.squeeze() | |
if invalid_mask is None: | |
invalid_mask = value == invalid_val | |
mask = np.logical_not(invalid_mask) | |
# normalize | |
vmin = np.percentile(value[mask],2) if vmin is None else vmin | |
vmax = np.percentile(value[mask],85) if vmax is None else vmax | |
if vmin != vmax: | |
value = (value - vmin) / (vmax - vmin) # vmin..vmax | |
else: | |
# Avoid 0-division | |
value = value * 0. | |
# squeeze last dim if it exists | |
# grey out the invalid values | |
value[invalid_mask] = np.nan | |
cmapper = matplotlib.cm.get_cmap(cmap) | |
if value_transform: | |
value = value_transform(value) | |
# value = value / value.max() | |
value = cmapper(value, bytes=True) # (nxmx4) | |
# img = value[:, :, :] | |
img = value[...] | |
img[invalid_mask] = background_color | |
# return img.transpose((2, 0, 1)) | |
if gamma_corrected: | |
# gamma correction | |
img = img / 255 | |
img = np.power(img, 2.2) | |
img = img * 255 | |
img = img.astype(np.uint8) | |
return img | |
def count_parameters(model, include_all=False): | |
return sum(p.numel() for p in model.parameters() if p.requires_grad or include_all) | |
def compute_errors(gt, pred): | |
"""Compute metrics for 'pred' compared to 'gt' | |
Args: | |
gt (numpy.ndarray): Ground truth values | |
pred (numpy.ndarray): Predicted values | |
gt.shape should be equal to pred.shape | |
Returns: | |
dict: Dictionary containing the following metrics: | |
'a1': Delta1 accuracy: Fraction of pixels that are within a scale factor of 1.25 | |
'a2': Delta2 accuracy: Fraction of pixels that are within a scale factor of 1.25^2 | |
'a3': Delta3 accuracy: Fraction of pixels that are within a scale factor of 1.25^3 | |
'abs_rel': Absolute relative error | |
'rmse': Root mean squared error | |
'log_10': Absolute log10 error | |
'sq_rel': Squared relative error | |
'rmse_log': Root mean squared error on the log scale | |
'silog': Scale invariant log error | |
""" | |
thresh = np.maximum((gt / pred), (pred / gt)) | |
a1 = (thresh < 1.25).mean() | |
a2 = (thresh < 1.25 ** 2).mean() | |
a3 = (thresh < 1.25 ** 3).mean() | |
abs_rel = np.mean(np.abs(gt - pred) / gt) | |
sq_rel = np.mean(((gt - pred) ** 2) / gt) | |
rmse = (gt - pred) ** 2 | |
rmse = np.sqrt(rmse.mean()) | |
rmse_log = (np.log(gt) - np.log(pred)) ** 2 | |
rmse_log = np.sqrt(rmse_log.mean()) | |
err = np.log(pred) - np.log(gt) | |
silog = np.sqrt(np.mean(err ** 2) - np.mean(err) ** 2) * 100 | |
log_10 = (np.abs(np.log10(gt) - np.log10(pred))).mean() | |
return dict(a1=a1, a2=a2, a3=a3, abs_rel=abs_rel, rmse=rmse, log_10=log_10, rmse_log=rmse_log, | |
silog=silog, sq_rel=sq_rel) | |
def shift_2d_replace(data, dx, dy, constant=False): | |
shifted_data = np.roll(data, dx, axis=1) | |
if dx < 0: | |
shifted_data[:, dx:] = constant | |
elif dx > 0: | |
shifted_data[:, 0:dx] = constant | |
shifted_data = np.roll(shifted_data, dy, axis=0) | |
if dy < 0: | |
shifted_data[dy:, :] = constant | |
elif dy > 0: | |
shifted_data[0:dy, :] = constant | |
return shifted_data | |
def soft_edge_error(pred, gt, radius=1): | |
abs_diff=[] | |
for i in range(-radius, radius + 1): | |
for j in range(-radius, radius + 1): | |
abs_diff.append(np.abs(shift_2d_replace(gt, i, j, 0) - pred)) | |
return np.minimum.reduce(abs_diff) | |
def get_boundaries(disp, th=1., dilation=10): | |
edges_y = np.logical_or(np.pad(np.abs(disp[1:, :] - disp[:-1, :]) > th, ((1, 0), (0, 0))), | |
np.pad(np.abs(disp[:-1, :] - disp[1:, :]) > th, ((0, 1), (0, 0)))) | |
edges_x = np.logical_or(np.pad(np.abs(disp[:, 1:] - disp[:, :-1]) > th, ((0, 0), (1, 0))), | |
np.pad(np.abs(disp[:, :-1] - disp[:,1:]) > th, ((0, 0), (0, 1)))) | |
edges = np.logical_or(edges_y, edges_x).astype(np.float32) | |
if dilation > 0: | |
kernel = np.ones((dilation, dilation), np.uint8) | |
edges = cv2.dilate(edges, kernel, iterations=1) | |
return edges | |
def scale_shift_linear(rendered_depth, predicted_depth, mask, fuse=True, return_params=False): | |
""" | |
Optimize a scale and shift parameter in the least squares sense, such that rendered_depth and predicted_depth match. | |
Formally, solves the following objective: | |
min || (d * a + b) - d_hat || | |
a, b | |
where d = 1 / predicted_depth, d_hat = 1 / rendered_depth | |
:param rendered_depth: torch.Tensor (H, W) | |
:param predicted_depth: torch.Tensor (H, W) | |
:param mask: torch.Tensor (H, W) - True: valid points of rendered_depth, False: invalid points of rendered_depth (ignore) | |
:param fuse: whether to fuse shifted/scaled predicted_depth with the rendered_depth | |
:return: scale/shift corrected depth | |
""" | |
if mask.sum() == 0: | |
return predicted_depth | |
# rendered_disparity = 1 / rendered_depth[mask].unsqueeze(-1) | |
# predicted_disparity = 1 / predicted_depth[mask].unsqueeze(-1) | |
rendered_disparity = rendered_depth[mask].unsqueeze(-1) | |
predicted_disparity = predicted_depth[mask].unsqueeze(-1) | |
X = torch.cat([predicted_disparity, torch.ones_like(predicted_disparity)], dim=1) | |
XTX_inv = (X.T @ X).inverse() | |
XTY = X.T @ rendered_disparity | |
AB = XTX_inv @ XTY | |
if return_params: | |
return AB | |
fixed_disparity = (predicted_depth) * AB[0] + AB[1] | |
fixed_depth = fixed_disparity | |
if fuse: | |
fused_depth = torch.where(mask, rendered_depth, fixed_depth) | |
return fused_depth | |
else: | |
return fixed_depth | |
def compute_metrics(gt, pred, interpolate=True, garg_crop=False, eigen_crop=True, dataset='nyu', min_depth_eval=0.1, max_depth_eval=10, disp_gt_edges=None, pred_depths=None, **kwargs): | |
"""Compute metrics of predicted depth maps. Applies cropping and masking as necessary or specified via arguments. Refer to compute_errors for more details on metrics. | |
""" | |
if 'config' in kwargs: | |
config = kwargs['config'] | |
garg_crop = config.garg_crop | |
eigen_crop = config.eigen_crop | |
min_depth_eval = config.min_depth_eval | |
max_depth_eval = config.max_depth_eval | |
if gt.shape[-2:] != pred.shape[-2:] and interpolate: | |
pred = nn.functional.interpolate( | |
pred.unsqueeze(dim=0).unsqueeze(dim=0), gt.shape[-2:], mode='bilinear', align_corners=True).squeeze() | |
pred = pred.squeeze().cpu().numpy() | |
pred[pred < min_depth_eval] = min_depth_eval | |
pred[pred > max_depth_eval] = max_depth_eval | |
pred[np.isinf(pred)] = max_depth_eval | |
pred[np.isnan(pred)] = min_depth_eval | |
gt_depth = gt.squeeze().cpu().numpy() | |
valid_mask = np.logical_and( | |
gt_depth > min_depth_eval, gt_depth < max_depth_eval) | |
eval_mask = np.ones(valid_mask.shape) | |
if garg_crop or eigen_crop: | |
gt_height, gt_width = gt_depth.shape | |
eval_mask = np.zeros(valid_mask.shape) | |
if garg_crop: | |
eval_mask[int(0.40810811 * gt_height):int(0.99189189 * gt_height), | |
int(0.03594771 * gt_width):int(0.96405229 * gt_width)] = 1 | |
elif eigen_crop: | |
# print("-"*10, " EIGEN CROP ", "-"*10) | |
if dataset == 'kitti': | |
eval_mask[int(0.3324324 * gt_height):int(0.91351351 * gt_height), | |
int(0.0359477 * gt_width):int(0.96405229 * gt_width)] = 1 | |
else: | |
# assert gt_depth.shape == (480, 640), "Error: Eigen crop is currently only valid for (480, 640) images" | |
eval_mask[45:471, 41:601] = 1 | |
else: | |
eval_mask = np.ones(valid_mask.shape) | |
valid_mask = np.logical_and(valid_mask, eval_mask) | |
# if dataset == 'nyu': | |
# # pred = scale_shift_linear(torch.tensor(pred_depths), torch.tensor(pred), torch.tensor(valid_mask), fuse=False).numpy() | |
# pred = scale_shift_linear(torch.tensor(gt), torch.tensor(pred), torch.tensor(valid_mask), fuse=False).numpy() | |
metrics = compute_errors(gt_depth[valid_mask], pred[valid_mask]) | |
mask = valid_mask.squeeze() # squeeze | |
gt = gt_depth | |
pred = pred | |
see_depth = 0 | |
if disp_gt_edges is None: | |
print("Maybe we need edge maps from origin disp!") | |
edges = get_boundaries(gt, th=0.08, dilation=0) | |
else: | |
edges = disp_gt_edges | |
mask = np.logical_and(mask, edges) | |
import matplotlib.pyplot as plt | |
if mask.sum() > 0: | |
see_depth = soft_edge_error(pred, gt)[mask].mean() | |
metrics['see'] = see_depth | |
return metrics | |
# 'abs_rel': 0.07546425755890458, 'rmse': 0.2714709522322233, base zoe | |
# 'abs_rel': 0.04409278385819647, 'rmse': 0.18093922881791188, base zoe+opt | |
# patchfusion + pred scale-shift: 'abs_rel': 0.09078774519765959, 'rmse': 0.31991247948976803, | |
# gt scale-shift abs_rel': 0.06316796072476771, 'rmse': 0.24189620860353886, | |
#################################### Model uilts ################################################ | |
def parallelize(config, model, find_unused_parameters=True): | |
if config.gpu is not None: | |
torch.cuda.set_device(config.gpu) | |
model = model.cuda(config.gpu) | |
config.multigpu = False | |
if config.distributed: | |
# Use DDP | |
config.multigpu = True | |
config.rank = config.rank * config.ngpus_per_node + config.gpu | |
dist.init_process_group(backend=config.dist_backend, init_method=config.dist_url, | |
world_size=config.world_size, rank=config.rank) | |
config.batch_size = int(config.batch_size / config.ngpus_per_node) | |
# config.batch_size = 8 | |
config.workers = int( | |
(config.num_workers + config.ngpus_per_node - 1) / config.ngpus_per_node) | |
print("Device", config.gpu, "Rank", config.rank, "batch size", | |
config.batch_size, "Workers", config.workers) | |
torch.cuda.set_device(config.gpu) | |
model = nn.SyncBatchNorm.convert_sync_batchnorm(model) | |
model = model.cuda(config.gpu) | |
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu], output_device=config.gpu, | |
find_unused_parameters=find_unused_parameters) | |
# model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[config.gpu], output_device=config.gpu, | |
# find_unused_parameters=True) | |
elif config.gpu is None: | |
# Use DP | |
config.multigpu = True | |
model = model.cuda() | |
model = torch.nn.DataParallel(model) | |
return model | |
################################################################################################# | |
##################################################################################################### | |
class colors: | |
'''Colors class: | |
Reset all colors with colors.reset | |
Two subclasses fg for foreground and bg for background. | |
Use as colors.subclass.colorname. | |
i.e. colors.fg.red or colors.bg.green | |
Also, the generic bold, disable, underline, reverse, strikethrough, | |
and invisible work with the main class | |
i.e. colors.bold | |
''' | |
reset = '\033[0m' | |
bold = '\033[01m' | |
disable = '\033[02m' | |
underline = '\033[04m' | |
reverse = '\033[07m' | |
strikethrough = '\033[09m' | |
invisible = '\033[08m' | |
class fg: | |
black = '\033[30m' | |
red = '\033[31m' | |
green = '\033[32m' | |
orange = '\033[33m' | |
blue = '\033[34m' | |
purple = '\033[35m' | |
cyan = '\033[36m' | |
lightgrey = '\033[37m' | |
darkgrey = '\033[90m' | |
lightred = '\033[91m' | |
lightgreen = '\033[92m' | |
yellow = '\033[93m' | |
lightblue = '\033[94m' | |
pink = '\033[95m' | |
lightcyan = '\033[96m' | |
class bg: | |
black = '\033[40m' | |
red = '\033[41m' | |
green = '\033[42m' | |
orange = '\033[43m' | |
blue = '\033[44m' | |
purple = '\033[45m' | |
cyan = '\033[46m' | |
lightgrey = '\033[47m' | |
def printc(text, color): | |
print(f"{color}{text}{colors.reset}") | |
############################################ | |
def get_image_from_url(url): | |
response = requests.get(url) | |
img = Image.open(BytesIO(response.content)).convert("RGB") | |
return img | |
def url_to_torch(url, size=(384, 384)): | |
img = get_image_from_url(url) | |
img = img.resize(size, Image.ANTIALIAS) | |
img = torch.from_numpy(np.asarray(img)).float() | |
img = img.permute(2, 0, 1) | |
img.div_(255) | |
return img | |
def pil_to_batched_tensor(img): | |
return ToTensor()(img).unsqueeze(0) | |
def save_raw_16bit(depth, fpath="raw.png"): | |
if isinstance(depth, torch.Tensor): | |
depth = depth.squeeze().cpu().numpy() | |
assert isinstance(depth, np.ndarray), "Depth must be a torch tensor or numpy array" | |
assert depth.ndim == 2, "Depth must be 2D" | |
depth = depth * 256 # scale for 16-bit png | |
depth = depth.astype(np.uint16) | |
depth = Image.fromarray(depth) | |
depth.save(fpath) | |
print("Saved raw depth to", fpath) | |
def generatemask(size, k_size=-1, sigma=-1, h_factor=0.03, w_factor=0.02): | |
# Generates a Guassian mask | |
mask = np.zeros(size, dtype=np.float32) | |
if sigma == -1: | |
sigma = int(size[0]/16) | |
if k_size == -1: | |
k_size = int(2 * np.ceil(2 * int(size[0]/16)) + 1) | |
# mask[int(0.02*size[0]):size[0] - int(0.02*size[0]), int(0.015*size[1]): size[1] - int(0.015*size[1])] = 1 | |
mask[int(h_factor*size[0]):size[0] - int(h_factor*size[0]), int(w_factor*size[1]): size[1] - int(w_factor*size[1])] = 1 | |
mask = cv2.GaussianBlur(mask, (int(k_size), int(k_size)), sigma) | |
mask = (mask - mask.min()) / (mask.max() - mask.min()) | |
mask = mask.astype(np.float32) | |
return mask | |