hololens's picture
Upload folder using huggingface_hub
e04dce3 verified
from pathlib import Path
from PIL import Image
try:
from tqdm import trange
except:
from builtins import range as trange
import torch, gc
import cv2
import os.path
import numpy as np
import copy
import platform
import math
# Our code
from src.misc import *
from src.common_constants import GenerationOptions as go
from src.common_constants import *
from src.stereoimage_generation import create_stereoimages
from src.normalmap_generation import create_normalmap
from src.depthmap_generation import ModelHolder
from src import backbone
try:
# 3d-photo-inpainting imports
from inpaint.mesh import write_mesh, read_mesh, output_3d_photo
from inpaint.networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net
from inpaint.utils import path_planning
from inpaint.bilateral_filtering import sparse_bilateral_filtering
except Exception as e:
print('Impaint import failed. Impaint will not work.')
import traceback
traceback.print_exc()
global video_mesh_data, video_mesh_fn
video_mesh_data = None
video_mesh_fn = None
model_holder = ModelHolder()
def convert_to_i16(arr):
# Single channel, 16 bit image. This loses some precision!
# uint16 conversion uses round-down, therefore values should be [0; 2**16)
numbytes = 2
max_val = (2 ** (8 * numbytes))
out = np.clip(arr * max_val + 0.0001, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing
return out.astype("uint16")
def convert_i16_to_rgb(image, like):
# three channel, 8 bits per channel image
output = np.zeros_like(like)
output[:, :, 0] = image / 256.0
output[:, :, 1] = image / 256.0
output[:, :, 2] = image / 256.0
return output
class CoreGenerationFunnelInp:
"""This class takes a dictionary and creates a core_generation_funnel inp.
Non-applicable parameters are silently discarded (no error)"""
def __init__(self, values):
if isinstance(values, CoreGenerationFunnelInp):
values = values.values
values = {(k.name if isinstance(k, GenerationOptions) else k).lower(): v for k, v in values.items()}
self.values = {}
for setting in GenerationOptions:
name = setting.name.lower()
self.values[name] = values[name] if name in values else setting.df
def __getitem__(self, item):
if isinstance(item, GenerationOptions):
return self.values[item.name.lower()]
return self.values[item]
def __getattr__(self, item):
return self[item]
def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None):
if len(inputimages) == 0 or inputimages[0] is None:
return
if inputdepthmaps is None or len(inputdepthmaps) == 0:
inputdepthmaps: list[Image] = [None for _ in range(len(inputimages))]
inputdepthmaps_complete = all([x is not None for x in inputdepthmaps])
inp = CoreGenerationFunnelInp(inp)
if ops is None:
ops = backbone.gather_ops()
model_holder.update_settings(**ops)
# TODO: ideally, run_depthmap should not save meshes - that makes the function not pure
print(SCRIPT_FULL_NAME)
print(f'Backbone: {backbone.USED_BACKBONE.name}')
backbone.unload_sd_model()
# TODO: this still should not be here
background_removed_images = []
# remove on base image before depth calculation
if inp[go.GEN_REMBG]:
if inp[go.PRE_DEPTH_BACKGROUND_REMOVAL]:
inputimages = batched_background_removal(inputimages, inp[go.REMBG_MODEL])
background_removed_images = inputimages
else:
background_removed_images = batched_background_removal(inputimages, inp[go.REMBG_MODEL])
# init torch device
if inp[go.COMPUTE_DEVICE] == 'GPU':
if torch.cuda.is_available():
device = torch.device("cuda")
else:
print('WARNING: Cuda device was not found, cpu will be used')
device = torch.device("cpu")
else:
device = torch.device("cpu")
print("device: %s" % device)
# TODO: This should not be here
inpaint_imgs = []
inpaint_depths = []
try:
if not inputdepthmaps_complete:
print("Loading model(s) ..")
model_holder.ensure_models(inp[go.MODEL_TYPE], device, inp[go.BOOST], inp[go.TILING_MODE])
print("Computing output(s) ..")
# iterate over input images
for count in trange(0, len(inputimages)):
# Convert single channel input (PIL) images to rgb
if inputimages[count].mode == 'I':
inputimages[count].point(lambda p: p * 0.0039063096, mode='RGB')
inputimages[count] = inputimages[count].convert('RGB')
raw_prediction = None
"""Raw prediction, as returned by a model. None if input depthmap is used."""
raw_prediction_invert = False
"""True if near=dark on raw_prediction"""
out = None
if inputdepthmaps is not None and inputdepthmaps[count] is not None:
# use custom depthmap
dp = inputdepthmaps[count]
if isinstance(dp, Image.Image):
if dp.width != inputimages[count].width or dp.height != inputimages[count].height:
try: # LANCZOS may fail on some formats
dp = dp.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS)
except:
dp = dp.resize((inputimages[count].width, inputimages[count].height))
# Trying desperately to rescale image to [0;1) without actually normalizing it
# Normalizing is avoided, because we want to preserve the scale of the original depthmaps
# (batch mode, video mode).
if len(dp.getbands()) == 1:
out = np.asarray(dp, dtype="float")
out_max = out.max()
if out_max < 256:
bit_depth = 8
elif out_max < 65536:
bit_depth = 16
else:
bit_depth = 32
out /= 2.0 ** bit_depth
else:
out = np.asarray(dp, dtype="float")[:, :, 0]
out /= 256.0
else:
# Should be in interval [0; 1], values outside of this range will be clipped.
out = np.asarray(dp, dtype="float")
assert inputimages[count].height == out.shape[0], "Custom depthmap height mismatch"
assert inputimages[count].width == out.shape[1], "Custom depthmap width mismatch"
else:
# override net size (size may be different for different images)
if inp[go.NET_SIZE_MATCH]:
# Round up to a multiple of 32 to avoid potential issues
# TODO: buggs for Depth Anything
net_width = (inputimages[count].width + 31) // 32 * 32
net_height = (inputimages[count].height + 31) // 32 * 32
else:
net_width = inp[go.NET_WIDTH]
net_height = inp[go.NET_HEIGHT]
raw_prediction, raw_prediction_invert = \
model_holder.get_raw_prediction(inputimages[count], net_width, net_height)
# output
if abs(raw_prediction.max() - raw_prediction.min()) > np.finfo("float").eps:
out = np.copy(raw_prediction)
# TODO: some models may output negative values, maybe these should be clamped to zero.
if raw_prediction_invert:
out *= -1
if inp[go.DO_OUTPUT_DEPTH_PREDICTION]:
yield count, 'depth_prediction', np.copy(out)
if inp[go.CLIPDEPTH]:
if inp[go.CLIPDEPTH_MODE] == 'Range':
out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1]
out = np.clip(out, inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR])
elif inp[go.CLIPDEPTH_MODE] == 'Outliers':
fb, nb = np.percentile(out, [inp[go.CLIPDEPTH_FAR] * 100.0, inp[go.CLIPDEPTH_NEAR] * 100.0])
out = np.clip(out, fb, nb)
out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1]
else:
# Regretfully, the depthmap is broken and will be replaced with a black image
out = np.zeros(raw_prediction.shape)
# Maybe we should not use img_output for everything, since we get better accuracy from
# the raw_prediction. However, it is not always supported. We maybe would like to achieve
# reproducibility, so depthmap of the image should be the same as generating the depthmap one more time.
img_output = convert_to_i16(out)
"""Depthmap (near=bright), as uint16"""
# if 3dinpainting, store maps for processing in second pass
if inp[go.GEN_INPAINTED_MESH]:
inpaint_imgs.append(inputimages[count])
inpaint_depths.append(img_output)
# applying background masks after depth
if inp[go.GEN_REMBG]:
print('applying background masks')
background_removed_image = background_removed_images[count]
# maybe a threshold cut would be better on the line below.
background_removed_array = np.array(background_removed_image)
bg_mask = (background_removed_array[:, :, 0] == 0) & (background_removed_array[:, :, 1] == 0) & (
background_removed_array[:, :, 2] == 0) & (background_removed_array[:, :, 3] <= 0.2)
img_output[bg_mask] = 0 # far value
yield count, 'background_removed', background_removed_image
if inp[go.SAVE_BACKGROUND_REMOVAL_MASKS]:
bg_array = (1 - bg_mask.astype('int8')) * 255
mask_array = np.stack((bg_array, bg_array, bg_array, bg_array), axis=2)
mask_image = Image.fromarray(mask_array.astype(np.uint8))
yield count, 'foreground_mask', mask_image
# A weird quirk: if user tries to save depthmap, whereas custom depthmap is used,
# custom depthmap will be outputed
if inp[go.DO_OUTPUT_DEPTH]:
img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output
if inp[go.OUTPUT_DEPTH_COMBINE]:
axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0
img_concat = Image.fromarray(np.concatenate(
(inputimages[count], convert_i16_to_rgb(img_depth, inputimages[count])),
axis=axis))
yield count, 'concat_depth', img_concat
else:
yield count, 'depth', Image.fromarray(img_depth)
if inp[go.GEN_STEREO]:
# print("Generating stereoscopic image(s)..")
stereoimages = create_stereoimages(
inputimages[count], img_output,
inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION],
inp[go.STEREO_MODES],
inp[go.STEREO_BALANCE], inp[go.STEREO_OFFSET_EXPONENT], inp[go.STEREO_FILL_ALGO])
for c in range(0, len(stereoimages)):
yield count, inp[go.STEREO_MODES][c], stereoimages[c]
if inp[go.GEN_NORMALMAP]:
normalmap = create_normalmap(
img_output,
inp[go.NORMALMAP_PRE_BLUR_KERNEL] if inp[go.NORMALMAP_PRE_BLUR] else None,
inp[go.NORMALMAP_SOBEL_KERNEL] if inp[go.NORMALMAP_SOBEL] else None,
inp[go.NORMALMAP_POST_BLUR_KERNEL] if inp[go.NORMALMAP_POST_BLUR] else None,
inp[go.NORMALMAP_INVERT]
)
yield count, 'normalmap', normalmap
if inp[go.GEN_HEATMAP]:
from dzoedepth.utils.misc import colorize
heatmap = Image.fromarray(colorize(img_output, cmap='inferno'))
yield count, 'heatmap', heatmap
# gen mesh
if inp[go.GEN_SIMPLE_MESH]:
print(f"\nGenerating (occluded) mesh ..")
basename = 'depthmap'
meshsimple_fi = get_uniquefn(outpath, basename, 'obj', 'simple')
depthi = raw_prediction if raw_prediction is not None else out
depthi_min, depthi_max = depthi.min(), depthi.max()
# try to map output to sensible values for non zoedepth models, boost, or custom maps
if inp[go.MODEL_TYPE] not in [7, 8, 9] or inp[go.BOOST] or inputdepthmaps[count] is not None:
# invert if midas
if inp[go.MODEL_TYPE] > 0 or inputdepthmaps[count] is not None: # TODO: Weird
depthi = depthi_max - depthi + depthi_min
depth_max = depthi.max()
depth_min = depthi.min()
# make positive
if depthi_min < 0:
depthi = depthi - depthi_min
depth_max = depthi.max()
depth_min = depthi.min()
# scale down
if depthi.max() > 10.0:
depthi = 4.0 * (depthi - depthi_min) / (depthi_max - depthi_min)
# offset
depthi = depthi + 1.0
mesh = create_mesh(inputimages[count], depthi, keep_edges=not inp[go.SIMPLE_MESH_OCCLUDE],
spherical=(inp[go.SIMPLE_MESH_SPHERICAL]))
mesh.export(meshsimple_fi)
yield count, 'simple_mesh', meshsimple_fi
print("Computing output(s) done.")
except Exception as e:
import traceback
if 'out of memory' in str(e).lower():
print(str(e))
suggestion = "out of GPU memory, could not generate depthmap! " \
"Here are some suggestions to work around this issue:\n"
if inp[go.BOOST]:
suggestion += " * Disable BOOST (generation will be faster, but the depthmap will be less detailed)\n"
if backbone.USED_BACKBONE != backbone.BackboneType.STANDALONE:
suggestion += " * Run DepthMap in the standalone mode - without launching the SD WebUI\n"
if device != torch.device("cpu"):
suggestion += " * Select CPU as the processing device (this will be slower)\n"
if inp[go.MODEL_TYPE] != 6:
suggestion +=\
" * Use a different model (generally, more memory-consuming models produce better depthmaps)\n"
if not inp[go.BOOST]:
suggestion += " * Reduce net size (this could reduce quality)\n"
print('Fail.\n')
raise Exception(suggestion)
else:
print('Fail.\n')
raise e
finally:
if backbone.get_opt('depthmap_script_keepmodels', True):
model_holder.offload() # Swap to CPU memory
else:
model_holder.unload_models()
gc.collect()
backbone.torch_gc()
# TODO: This should not be here
if inp[go.GEN_INPAINTED_MESH]:
try:
mesh_fi = run_3dphoto(device, inpaint_imgs, inpaint_depths, inputnames, outpath,
inp[go.GEN_INPAINTED_MESH_DEMOS],
1, "mp4")
yield 0, 'inpainted_mesh', mesh_fi
except Exception as e:
print(f'{str(e)}, some issue with generating inpainted mesh')
backbone.reload_sd_model()
print("All done.\n")
def get_uniquefn(outpath, basename, ext, suffix=''):
basecount = backbone.get_next_sequence_number(outpath, basename)
if basecount > 0:
basecount -= 1
if suffix != '':
suffix = f'-{suffix}' # Dash is important for selecting unique filenames (see get_next_sequence_number)
for i in range(500):
fullfn = os.path.join(outpath, f"{basename}-{basecount + i:04}{suffix}.{ext}")
if not os.path.exists(fullfn):
return fullfn
return f"{basename}-99999{suffix}.{ext}" # Failback, should never be executed
def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, gen_inpainted_mesh_demos, vid_ssaa, vid_format):
mesh_fi = ''
try:
print("Running 3D Photo Inpainting .. ")
edgemodel_path = './models/3dphoto/edge_model.pth'
depthmodel_path = './models/3dphoto/depth_model.pth'
colormodel_path = './models/3dphoto/color_model.pth'
# create paths to model if not present
os.makedirs('./models/3dphoto/', exist_ok=True)
ensure_file_downloaded(
edgemodel_path,
["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/edge-model.pth",
"https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/edge-model.pth"],
"b1d768bd008ad5fe9f540004f870b8c3d355e4939b2009aa4db493fd313217c9")
ensure_file_downloaded(
depthmodel_path,
["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/depth-model.pth",
"https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/depth-model.pth"],
"2d0e63e89a22762ddfa8bc8c9f8c992e5532b140123274ffc6e4171baa1b76f8")
ensure_file_downloaded(
colormodel_path,
["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/color-model.pth",
"https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/color-model.pth"],
"383c9b1db70097907a6f9c8abb0303e7056f50d5456a36f34ab784592b8b2c20"
)
print("Loading edge model ..")
depth_edge_model = Inpaint_Edge_Net(init_weights=True)
depth_edge_weight = torch.load(edgemodel_path, map_location=torch.device(device))
depth_edge_model.load_state_dict(depth_edge_weight)
depth_edge_model = depth_edge_model.to(device)
depth_edge_model.eval()
print("Loading depth model ..")
depth_feat_model = Inpaint_Depth_Net()
depth_feat_weight = torch.load(depthmodel_path, map_location=torch.device(device))
depth_feat_model.load_state_dict(depth_feat_weight, strict=True)
depth_feat_model = depth_feat_model.to(device)
depth_feat_model.eval()
depth_feat_model = depth_feat_model.to(device)
print("Loading rgb model ..")
rgb_model = Inpaint_Color_Net()
rgb_feat_weight = torch.load(colormodel_path, map_location=torch.device(device))
rgb_model.load_state_dict(rgb_feat_weight)
rgb_model.eval()
rgb_model = rgb_model.to(device)
config = {}
config["gpu_ids"] = 0
config['extrapolation_thickness'] = 60
config['extrapolate_border'] = True
config['depth_threshold'] = 0.04
config['redundant_number'] = 12
config['ext_edge_threshold'] = 0.002
config['background_thickness'] = 70
config['context_thickness'] = 140
config['background_thickness_2'] = 70
config['context_thickness_2'] = 70
config['log_depth'] = True
config['depth_edge_dilate'] = 10
config['depth_edge_dilate_2'] = 5
config['largest_size'] = 512
config['repeat_inpaint_edge'] = True
config['ply_fmt'] = "bin"
config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False)
config['save_obj'] = True
if device == torch.device("cpu"):
config["gpu_ids"] = -1
for count in trange(0, len(img_rgb)):
basename = 'depthmap'
if inputnames is not None:
if inputnames[count] is not None:
p = Path(inputnames[count])
basename = p.stem
mesh_fi = get_uniquefn(outpath, basename, 'obj')
print(f"\nGenerating inpainted mesh .. (go make some coffee) ..")
# from inpaint.utils.get_MiDaS_samples
W = img_rgb[count].width
H = img_rgb[count].height
int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32)
if int_mtx.max() > 1:
int_mtx[0, :] = int_mtx[0, :] / float(W)
int_mtx[1, :] = int_mtx[1, :] / float(H)
# how inpaint.utils.read_MiDaS_depth() imports depthmap
disp = img_depth[count].astype(np.float32)
disp = disp - disp.min()
disp = cv2.blur(disp / disp.max(), ksize=(3, 3)) * disp.max()
disp = (disp / disp.max()) * 3.0
depth = 1. / np.maximum(disp, 0.05)
# rgb input
img = np.asarray(img_rgb[count])
if len(img.shape) > 2 and img.shape[2] == 4:
# convert the image from RGBA2RGB
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
# run sparse bilateral filter
config['sparse_iter'] = 5
config['filter_size'] = [7, 7, 5, 5, 5]
config['sigma_s'] = 4.0
config['sigma_r'] = 0.5
vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), img.copy(), config,
num_iter=config['sparse_iter'], spdb=False)
depth = vis_depths[-1]
# bilat_fn = os.path.join(outpath, basename +'_bilatdepth.png')
# cv2.imwrite(bilat_fn, depth)
rt_info = write_mesh(img,
depth,
int_mtx,
mesh_fi,
config,
rgb_model,
depth_edge_model,
depth_edge_model,
depth_feat_model)
if rt_info is not False and gen_inpainted_mesh_demos:
run_3dphoto_videos(mesh_fi, basename, outpath, 300, 40,
[0.03, 0.03, 0.05, 0.03],
['double-straight-line', 'double-straight-line', 'circle', 'circle'],
[0.00, 0.00, -0.015, -0.015],
[0.00, 0.00, -0.015, -0.00],
[-0.05, -0.05, -0.05, -0.05],
['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa)
backbone.torch_gc()
finally:
del rgb_model
rgb_model = None
del depth_edge_model
depth_edge_model = None
del depth_feat_model
depth_feat_model = None
backbone.torch_gc()
return mesh_fi
def run_3dphoto_videos(mesh_fi, basename, outpath, num_frames, fps, crop_border, traj_types, x_shift_range,
y_shift_range, z_shift_range, video_postfix, vid_dolly, vid_format, vid_ssaa):
import vispy
try:
if platform.system() == 'Windows':
vispy.use(app='PyQt5')
elif platform.system() == 'Darwin':
vispy.use('PyQt6')
else:
vispy.use(app='egl')
except:
import traceback
print(traceback.format_exc())
print('Trying an alternative...')
for u in ['PyQt5', 'PyQt6', 'egl']:
try:
vispy.use(app=u)
break
except:
print(f'On {u}')
print(traceback.format_exc())
# Honestly, I don't know if it actually helps at all
# read ply
global video_mesh_data, video_mesh_fn
if video_mesh_fn is None or video_mesh_fn != mesh_fi:
try:
del video_mesh_data
except:
print("del video_mesh_data failed")
video_mesh_fn = mesh_fi
video_mesh_data = read_mesh(mesh_fi)
verts, colors, faces, Height, Width, hFov, vFov, mean_loc_depth = video_mesh_data
original_w = output_w = W = Width
original_h = output_h = H = Height
int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32)
if int_mtx.max() > 1:
int_mtx[0, :] = int_mtx[0, :] / float(W)
int_mtx[1, :] = int_mtx[1, :] / float(H)
config = {}
config['video_folder'] = outpath
config['num_frames'] = num_frames
config['fps'] = fps
config['crop_border'] = crop_border
config['traj_types'] = traj_types
config['x_shift_range'] = x_shift_range
config['y_shift_range'] = y_shift_range
config['z_shift_range'] = z_shift_range
config['video_postfix'] = video_postfix
config['ssaa'] = vid_ssaa
# from inpaint.utils.get_MiDaS_samples
generic_pose = np.eye(4)
assert len(config['traj_types']) == len(config['x_shift_range']) == \
len(config['y_shift_range']) == len(config['z_shift_range']) == len(config['video_postfix']), \
"The number of elements in 'traj_types', 'x_shift_range', 'y_shift_range', 'z_shift_range' and \
'video_postfix' should be equal."
tgt_pose = [[generic_pose * 1]]
tgts_poses = []
for traj_idx in range(len(config['traj_types'])):
tgt_poses = []
sx, sy, sz = path_planning(config['num_frames'], config['x_shift_range'][traj_idx],
config['y_shift_range'][traj_idx],
config['z_shift_range'][traj_idx], path_type=config['traj_types'][traj_idx])
for xx, yy, zz in zip(sx, sy, sz):
tgt_poses.append(generic_pose * 1.)
tgt_poses[-1][:3, -1] = np.array([xx, yy, zz])
tgts_poses += [tgt_poses]
tgt_pose = generic_pose * 1
# seems we only need the depthmap to calc mean_loc_depth, which is only used when doing 'dolly'
# width and height are already in the ply file in the comments ..
# might try to add the mean_loc_depth to it too
# did just that
# mean_loc_depth = img_depth[img_depth.shape[0]//2, img_depth.shape[1]//2]
print("Generating videos ..")
normal_canvas, all_canvas = None, None
videos_poses, video_basename = copy.deepcopy(tgts_poses), basename
top = (original_h // 2 - int_mtx[1, 2] * output_h)
left = (original_w // 2 - int_mtx[0, 2] * output_w)
down, right = top + output_h, left + output_w
border = [int(xx) for xx in [top, down, left, right]]
normal_canvas, all_canvas, fn_saved = output_3d_photo(verts.copy(), colors.copy(), faces.copy(),
copy.deepcopy(Height), copy.deepcopy(Width),
copy.deepcopy(hFov), copy.deepcopy(vFov),
copy.deepcopy(tgt_pose), config['video_postfix'],
copy.deepcopy(generic_pose),
copy.deepcopy(config['video_folder']),
None, copy.deepcopy(int_mtx), config, None,
videos_poses, video_basename, original_h, original_w,
border=border, depth=None, normal_canvas=normal_canvas,
all_canvas=all_canvas,
mean_loc_depth=mean_loc_depth, dolly=vid_dolly,
fnExt=vid_format)
return fn_saved
def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa,
outpath=None, basename=None):
if len(fn_mesh) == 0 or not os.path.exists(fn_mesh):
raise Exception("Could not open mesh.")
vid_ssaa = int(vid_ssaa)
# traj type
if vid_traj == 0:
vid_traj = ['straight-line']
elif vid_traj == 1:
vid_traj = ['double-straight-line']
elif vid_traj == 2:
vid_traj = ['circle']
num_fps = int(vid_fps)
num_frames = int(vid_numframes)
shifts = vid_shift.split(',')
if len(shifts) != 3:
raise Exception("Translate requires 3 elements.")
x_shift_range = [float(shifts[0])]
y_shift_range = [float(shifts[1])]
z_shift_range = [float(shifts[2])]
borders = vid_border.split(',')
if len(borders) != 4:
raise Exception("Crop Border requires 4 elements.")
crop_border = [float(borders[0]), float(borders[1]), float(borders[2]), float(borders[3])]
if not outpath:
outpath = backbone.get_outpath()
if not basename:
# output path and filename mess ..
basename = Path(fn_mesh).stem
# unique filename
basecount = backbone.get_next_sequence_number(outpath, basename)
if basecount > 0: basecount = basecount - 1
fullfn = None
for i in range(500):
fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}"
fullfn = os.path.join(outpath, f"{fn}_." + vid_format)
if not os.path.exists(fullfn):
break
basename = Path(fullfn).stem
basename = basename[:-1]
print("Loading mesh ..")
fn_saved = run_3dphoto_videos(fn_mesh, basename, outpath, num_frames, num_fps, crop_border, vid_traj, x_shift_range,
y_shift_range, z_shift_range, [''], dolly, vid_format, vid_ssaa)
return fn_saved[-1], fn_saved[-1], ''
def unload_models():
model_holder.unload_models()
# TODO: code borrowed from the internet to be marked as such and to reside in separate files
def batched_background_removal(inimages, model_name):
from rembg import new_session, remove
print('creating background masks')
outimages = []
# model path and name
bg_model_dir = Path.joinpath(Path().resolve(), "models/rem_bg")
os.makedirs(bg_model_dir, exist_ok=True)
os.environ["U2NET_HOME"] = str(bg_model_dir)
# starting a session
background_removal_session = new_session(model_name)
for count in range(0, len(inimages)):
bg_remove_img = np.array(remove(inimages[count], session=background_removal_session))
outimages.append(Image.fromarray(bg_remove_img))
# The line below might be redundant
del background_removal_session
return outimages
def pano_depth_to_world_points(depth):
"""
360 depth to world points
given 2D depth is an equirectangular projection of a spherical image
Treat depth as radius
longitude : -pi to pi
latitude : -pi/2 to pi/2
"""
# Convert depth to radius
radius = depth.flatten()
lon = np.linspace(-np.pi, np.pi, depth.shape[1])
lat = np.linspace(-np.pi / 2, np.pi / 2, depth.shape[0])
lon, lat = np.meshgrid(lon, lat)
lon = lon.flatten()
lat = lat.flatten()
# Convert to cartesian coordinates
x = radius * np.cos(lat) * np.cos(lon)
y = radius * np.cos(lat) * np.sin(lon)
z = radius * np.sin(lat)
pts3d = np.stack([x, y, z], axis=1)
return pts3d
def depth_edges_mask(depth):
"""Returns a mask of edges in the depth map.
Args:
depth: 2D numpy array of shape (H, W) with dtype float32.
Returns:
mask: 2D numpy array of shape (H, W) with dtype bool.
"""
# Compute the x and y gradients of the depth map.
depth_dx, depth_dy = np.gradient(depth)
# Compute the gradient magnitude.
depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2)
# Compute the edge mask.
mask = depth_grad > 0.05
return mask
def create_mesh(image, depth, keep_edges=False, spherical=False):
import trimesh
from dzoedepth.utils.geometry import depth_to_points, create_triangles
maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048)
# limit the size of the input image
image.thumbnail((maxsize, maxsize))
if not spherical:
pts3d = depth_to_points(depth[None])
else:
pts3d = pano_depth_to_world_points(depth)
pts3d = pts3d.reshape(-1, 3)
verts = pts3d.reshape(-1, 3)
image = np.array(image)
if keep_edges:
triangles = create_triangles(image.shape[0], image.shape[1])
else:
triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth))
colors = image.reshape(-1, 3)
mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors)
# rotate 90deg over X when spherical
if spherical:
angle = math.pi / 2
direction = [1, 0, 0]
center = [0, 0, 0]
rot_matrix = trimesh.transformations.rotation_matrix(angle, direction, center)
mesh.apply_transform(rot_matrix)
return mesh