from pathlib import Path from PIL import Image try: from tqdm import trange except: from builtins import range as trange import torch, gc import cv2 import os.path import numpy as np import copy import platform import math # Our code from src.misc import * from src.common_constants import GenerationOptions as go from src.common_constants import * from src.stereoimage_generation import create_stereoimages from src.normalmap_generation import create_normalmap from src.depthmap_generation import ModelHolder from src import backbone try: # 3d-photo-inpainting imports from inpaint.mesh import write_mesh, read_mesh, output_3d_photo from inpaint.networks import Inpaint_Color_Net, Inpaint_Depth_Net, Inpaint_Edge_Net from inpaint.utils import path_planning from inpaint.bilateral_filtering import sparse_bilateral_filtering except Exception as e: print('Impaint import failed. Impaint will not work.') import traceback traceback.print_exc() global video_mesh_data, video_mesh_fn video_mesh_data = None video_mesh_fn = None model_holder = ModelHolder() def convert_to_i16(arr): # Single channel, 16 bit image. This loses some precision! # uint16 conversion uses round-down, therefore values should be [0; 2**16) numbytes = 2 max_val = (2 ** (8 * numbytes)) out = np.clip(arr * max_val + 0.0001, 0, max_val - 0.1) # -0.1 from above is needed to avoid overflowing return out.astype("uint16") def convert_i16_to_rgb(image, like): # three channel, 8 bits per channel image output = np.zeros_like(like) output[:, :, 0] = image / 256.0 output[:, :, 1] = image / 256.0 output[:, :, 2] = image / 256.0 return output class CoreGenerationFunnelInp: """This class takes a dictionary and creates a core_generation_funnel inp. Non-applicable parameters are silently discarded (no error)""" def __init__(self, values): if isinstance(values, CoreGenerationFunnelInp): values = values.values values = {(k.name if isinstance(k, GenerationOptions) else k).lower(): v for k, v in values.items()} self.values = {} for setting in GenerationOptions: name = setting.name.lower() self.values[name] = values[name] if name in values else setting.df def __getitem__(self, item): if isinstance(item, GenerationOptions): return self.values[item.name.lower()] return self.values[item] def __getattr__(self, item): return self[item] def core_generation_funnel(outpath, inputimages, inputdepthmaps, inputnames, inp, ops=None): if len(inputimages) == 0 or inputimages[0] is None: return if inputdepthmaps is None or len(inputdepthmaps) == 0: inputdepthmaps: list[Image] = [None for _ in range(len(inputimages))] inputdepthmaps_complete = all([x is not None for x in inputdepthmaps]) inp = CoreGenerationFunnelInp(inp) if ops is None: ops = backbone.gather_ops() model_holder.update_settings(**ops) # TODO: ideally, run_depthmap should not save meshes - that makes the function not pure print(SCRIPT_FULL_NAME) print(f'Backbone: {backbone.USED_BACKBONE.name}') backbone.unload_sd_model() # TODO: this still should not be here background_removed_images = [] # remove on base image before depth calculation if inp[go.GEN_REMBG]: if inp[go.PRE_DEPTH_BACKGROUND_REMOVAL]: inputimages = batched_background_removal(inputimages, inp[go.REMBG_MODEL]) background_removed_images = inputimages else: background_removed_images = batched_background_removal(inputimages, inp[go.REMBG_MODEL]) # init torch device if inp[go.COMPUTE_DEVICE] == 'GPU': if torch.cuda.is_available(): device = torch.device("cuda") else: print('WARNING: Cuda device was not found, cpu will be used') device = torch.device("cpu") else: device = torch.device("cpu") print("device: %s" % device) # TODO: This should not be here inpaint_imgs = [] inpaint_depths = [] try: if not inputdepthmaps_complete: print("Loading model(s) ..") model_holder.ensure_models(inp[go.MODEL_TYPE], device, inp[go.BOOST], inp[go.TILING_MODE]) print("Computing output(s) ..") # iterate over input images for count in trange(0, len(inputimages)): # Convert single channel input (PIL) images to rgb if inputimages[count].mode == 'I': inputimages[count].point(lambda p: p * 0.0039063096, mode='RGB') inputimages[count] = inputimages[count].convert('RGB') raw_prediction = None """Raw prediction, as returned by a model. None if input depthmap is used.""" raw_prediction_invert = False """True if near=dark on raw_prediction""" out = None if inputdepthmaps is not None and inputdepthmaps[count] is not None: # use custom depthmap dp = inputdepthmaps[count] if isinstance(dp, Image.Image): if dp.width != inputimages[count].width or dp.height != inputimages[count].height: try: # LANCZOS may fail on some formats dp = dp.resize((inputimages[count].width, inputimages[count].height), Image.Resampling.LANCZOS) except: dp = dp.resize((inputimages[count].width, inputimages[count].height)) # Trying desperately to rescale image to [0;1) without actually normalizing it # Normalizing is avoided, because we want to preserve the scale of the original depthmaps # (batch mode, video mode). if len(dp.getbands()) == 1: out = np.asarray(dp, dtype="float") out_max = out.max() if out_max < 256: bit_depth = 8 elif out_max < 65536: bit_depth = 16 else: bit_depth = 32 out /= 2.0 ** bit_depth else: out = np.asarray(dp, dtype="float")[:, :, 0] out /= 256.0 else: # Should be in interval [0; 1], values outside of this range will be clipped. out = np.asarray(dp, dtype="float") assert inputimages[count].height == out.shape[0], "Custom depthmap height mismatch" assert inputimages[count].width == out.shape[1], "Custom depthmap width mismatch" else: # override net size (size may be different for different images) if inp[go.NET_SIZE_MATCH]: # Round up to a multiple of 32 to avoid potential issues # TODO: buggs for Depth Anything net_width = (inputimages[count].width + 31) // 32 * 32 net_height = (inputimages[count].height + 31) // 32 * 32 else: net_width = inp[go.NET_WIDTH] net_height = inp[go.NET_HEIGHT] raw_prediction, raw_prediction_invert = \ model_holder.get_raw_prediction(inputimages[count], net_width, net_height) # output if abs(raw_prediction.max() - raw_prediction.min()) > np.finfo("float").eps: out = np.copy(raw_prediction) # TODO: some models may output negative values, maybe these should be clamped to zero. if raw_prediction_invert: out *= -1 if inp[go.DO_OUTPUT_DEPTH_PREDICTION]: yield count, 'depth_prediction', np.copy(out) if inp[go.CLIPDEPTH]: if inp[go.CLIPDEPTH_MODE] == 'Range': out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] out = np.clip(out, inp[go.CLIPDEPTH_FAR], inp[go.CLIPDEPTH_NEAR]) elif inp[go.CLIPDEPTH_MODE] == 'Outliers': fb, nb = np.percentile(out, [inp[go.CLIPDEPTH_FAR] * 100.0, inp[go.CLIPDEPTH_NEAR] * 100.0]) out = np.clip(out, fb, nb) out = (out - out.min()) / (out.max() - out.min()) # normalize to [0; 1] else: # Regretfully, the depthmap is broken and will be replaced with a black image out = np.zeros(raw_prediction.shape) # Maybe we should not use img_output for everything, since we get better accuracy from # the raw_prediction. However, it is not always supported. We maybe would like to achieve # reproducibility, so depthmap of the image should be the same as generating the depthmap one more time. img_output = convert_to_i16(out) """Depthmap (near=bright), as uint16""" # if 3dinpainting, store maps for processing in second pass if inp[go.GEN_INPAINTED_MESH]: inpaint_imgs.append(inputimages[count]) inpaint_depths.append(img_output) # applying background masks after depth if inp[go.GEN_REMBG]: print('applying background masks') background_removed_image = background_removed_images[count] # maybe a threshold cut would be better on the line below. background_removed_array = np.array(background_removed_image) bg_mask = (background_removed_array[:, :, 0] == 0) & (background_removed_array[:, :, 1] == 0) & ( background_removed_array[:, :, 2] == 0) & (background_removed_array[:, :, 3] <= 0.2) img_output[bg_mask] = 0 # far value yield count, 'background_removed', background_removed_image if inp[go.SAVE_BACKGROUND_REMOVAL_MASKS]: bg_array = (1 - bg_mask.astype('int8')) * 255 mask_array = np.stack((bg_array, bg_array, bg_array, bg_array), axis=2) mask_image = Image.fromarray(mask_array.astype(np.uint8)) yield count, 'foreground_mask', mask_image # A weird quirk: if user tries to save depthmap, whereas custom depthmap is used, # custom depthmap will be outputed if inp[go.DO_OUTPUT_DEPTH]: img_depth = cv2.bitwise_not(img_output) if inp[go.OUTPUT_DEPTH_INVERT] else img_output if inp[go.OUTPUT_DEPTH_COMBINE]: axis = 1 if inp[go.OUTPUT_DEPTH_COMBINE_AXIS] == 'Horizontal' else 0 img_concat = Image.fromarray(np.concatenate( (inputimages[count], convert_i16_to_rgb(img_depth, inputimages[count])), axis=axis)) yield count, 'concat_depth', img_concat else: yield count, 'depth', Image.fromarray(img_depth) if inp[go.GEN_STEREO]: # print("Generating stereoscopic image(s)..") stereoimages = create_stereoimages( inputimages[count], img_output, inp[go.STEREO_DIVERGENCE], inp[go.STEREO_SEPARATION], inp[go.STEREO_MODES], inp[go.STEREO_BALANCE], inp[go.STEREO_OFFSET_EXPONENT], inp[go.STEREO_FILL_ALGO]) for c in range(0, len(stereoimages)): yield count, inp[go.STEREO_MODES][c], stereoimages[c] if inp[go.GEN_NORMALMAP]: normalmap = create_normalmap( img_output, inp[go.NORMALMAP_PRE_BLUR_KERNEL] if inp[go.NORMALMAP_PRE_BLUR] else None, inp[go.NORMALMAP_SOBEL_KERNEL] if inp[go.NORMALMAP_SOBEL] else None, inp[go.NORMALMAP_POST_BLUR_KERNEL] if inp[go.NORMALMAP_POST_BLUR] else None, inp[go.NORMALMAP_INVERT] ) yield count, 'normalmap', normalmap if inp[go.GEN_HEATMAP]: from dzoedepth.utils.misc import colorize heatmap = Image.fromarray(colorize(img_output, cmap='inferno')) yield count, 'heatmap', heatmap # gen mesh if inp[go.GEN_SIMPLE_MESH]: print(f"\nGenerating (occluded) mesh ..") basename = 'depthmap' meshsimple_fi = get_uniquefn(outpath, basename, 'obj', 'simple') depthi = raw_prediction if raw_prediction is not None else out depthi_min, depthi_max = depthi.min(), depthi.max() # try to map output to sensible values for non zoedepth models, boost, or custom maps if inp[go.MODEL_TYPE] not in [7, 8, 9] or inp[go.BOOST] or inputdepthmaps[count] is not None: # invert if midas if inp[go.MODEL_TYPE] > 0 or inputdepthmaps[count] is not None: # TODO: Weird depthi = depthi_max - depthi + depthi_min depth_max = depthi.max() depth_min = depthi.min() # make positive if depthi_min < 0: depthi = depthi - depthi_min depth_max = depthi.max() depth_min = depthi.min() # scale down if depthi.max() > 10.0: depthi = 4.0 * (depthi - depthi_min) / (depthi_max - depthi_min) # offset depthi = depthi + 1.0 mesh = create_mesh(inputimages[count], depthi, keep_edges=not inp[go.SIMPLE_MESH_OCCLUDE], spherical=(inp[go.SIMPLE_MESH_SPHERICAL])) mesh.export(meshsimple_fi) yield count, 'simple_mesh', meshsimple_fi print("Computing output(s) done.") except Exception as e: import traceback if 'out of memory' in str(e).lower(): print(str(e)) suggestion = "out of GPU memory, could not generate depthmap! " \ "Here are some suggestions to work around this issue:\n" if inp[go.BOOST]: suggestion += " * Disable BOOST (generation will be faster, but the depthmap will be less detailed)\n" if backbone.USED_BACKBONE != backbone.BackboneType.STANDALONE: suggestion += " * Run DepthMap in the standalone mode - without launching the SD WebUI\n" if device != torch.device("cpu"): suggestion += " * Select CPU as the processing device (this will be slower)\n" if inp[go.MODEL_TYPE] != 6: suggestion +=\ " * Use a different model (generally, more memory-consuming models produce better depthmaps)\n" if not inp[go.BOOST]: suggestion += " * Reduce net size (this could reduce quality)\n" print('Fail.\n') raise Exception(suggestion) else: print('Fail.\n') raise e finally: if backbone.get_opt('depthmap_script_keepmodels', True): model_holder.offload() # Swap to CPU memory else: model_holder.unload_models() gc.collect() backbone.torch_gc() # TODO: This should not be here if inp[go.GEN_INPAINTED_MESH]: try: mesh_fi = run_3dphoto(device, inpaint_imgs, inpaint_depths, inputnames, outpath, inp[go.GEN_INPAINTED_MESH_DEMOS], 1, "mp4") yield 0, 'inpainted_mesh', mesh_fi except Exception as e: print(f'{str(e)}, some issue with generating inpainted mesh') backbone.reload_sd_model() print("All done.\n") def get_uniquefn(outpath, basename, ext, suffix=''): basecount = backbone.get_next_sequence_number(outpath, basename) if basecount > 0: basecount -= 1 if suffix != '': suffix = f'-{suffix}' # Dash is important for selecting unique filenames (see get_next_sequence_number) for i in range(500): fullfn = os.path.join(outpath, f"{basename}-{basecount + i:04}{suffix}.{ext}") if not os.path.exists(fullfn): return fullfn return f"{basename}-99999{suffix}.{ext}" # Failback, should never be executed def run_3dphoto(device, img_rgb, img_depth, inputnames, outpath, gen_inpainted_mesh_demos, vid_ssaa, vid_format): mesh_fi = '' try: print("Running 3D Photo Inpainting .. ") edgemodel_path = './models/3dphoto/edge_model.pth' depthmodel_path = './models/3dphoto/depth_model.pth' colormodel_path = './models/3dphoto/color_model.pth' # create paths to model if not present os.makedirs('./models/3dphoto/', exist_ok=True) ensure_file_downloaded( edgemodel_path, ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/edge-model.pth", "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/edge-model.pth"], "b1d768bd008ad5fe9f540004f870b8c3d355e4939b2009aa4db493fd313217c9") ensure_file_downloaded( depthmodel_path, ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/depth-model.pth", "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/depth-model.pth"], "2d0e63e89a22762ddfa8bc8c9f8c992e5532b140123274ffc6e4171baa1b76f8") ensure_file_downloaded( colormodel_path, ["https://huggingface.co/spaces/Epoching/3D_Photo_Inpainting/resolve/e389e564fd2a55cf/checkpoints/color-model.pth", "https://filebox.ece.vt.edu/~jbhuang/project/3DPhoto/model/color-model.pth"], "383c9b1db70097907a6f9c8abb0303e7056f50d5456a36f34ab784592b8b2c20" ) print("Loading edge model ..") depth_edge_model = Inpaint_Edge_Net(init_weights=True) depth_edge_weight = torch.load(edgemodel_path, map_location=torch.device(device)) depth_edge_model.load_state_dict(depth_edge_weight) depth_edge_model = depth_edge_model.to(device) depth_edge_model.eval() print("Loading depth model ..") depth_feat_model = Inpaint_Depth_Net() depth_feat_weight = torch.load(depthmodel_path, map_location=torch.device(device)) depth_feat_model.load_state_dict(depth_feat_weight, strict=True) depth_feat_model = depth_feat_model.to(device) depth_feat_model.eval() depth_feat_model = depth_feat_model.to(device) print("Loading rgb model ..") rgb_model = Inpaint_Color_Net() rgb_feat_weight = torch.load(colormodel_path, map_location=torch.device(device)) rgb_model.load_state_dict(rgb_feat_weight) rgb_model.eval() rgb_model = rgb_model.to(device) config = {} config["gpu_ids"] = 0 config['extrapolation_thickness'] = 60 config['extrapolate_border'] = True config['depth_threshold'] = 0.04 config['redundant_number'] = 12 config['ext_edge_threshold'] = 0.002 config['background_thickness'] = 70 config['context_thickness'] = 140 config['background_thickness_2'] = 70 config['context_thickness_2'] = 70 config['log_depth'] = True config['depth_edge_dilate'] = 10 config['depth_edge_dilate_2'] = 5 config['largest_size'] = 512 config['repeat_inpaint_edge'] = True config['ply_fmt'] = "bin" config['save_ply'] = backbone.get_opt('depthmap_script_save_ply', False) config['save_obj'] = True if device == torch.device("cpu"): config["gpu_ids"] = -1 for count in trange(0, len(img_rgb)): basename = 'depthmap' if inputnames is not None: if inputnames[count] is not None: p = Path(inputnames[count]) basename = p.stem mesh_fi = get_uniquefn(outpath, basename, 'obj') print(f"\nGenerating inpainted mesh .. (go make some coffee) ..") # from inpaint.utils.get_MiDaS_samples W = img_rgb[count].width H = img_rgb[count].height int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32) if int_mtx.max() > 1: int_mtx[0, :] = int_mtx[0, :] / float(W) int_mtx[1, :] = int_mtx[1, :] / float(H) # how inpaint.utils.read_MiDaS_depth() imports depthmap disp = img_depth[count].astype(np.float32) disp = disp - disp.min() disp = cv2.blur(disp / disp.max(), ksize=(3, 3)) * disp.max() disp = (disp / disp.max()) * 3.0 depth = 1. / np.maximum(disp, 0.05) # rgb input img = np.asarray(img_rgb[count]) if len(img.shape) > 2 and img.shape[2] == 4: # convert the image from RGBA2RGB img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR) # run sparse bilateral filter config['sparse_iter'] = 5 config['filter_size'] = [7, 7, 5, 5, 5] config['sigma_s'] = 4.0 config['sigma_r'] = 0.5 vis_photos, vis_depths = sparse_bilateral_filtering(depth.copy(), img.copy(), config, num_iter=config['sparse_iter'], spdb=False) depth = vis_depths[-1] # bilat_fn = os.path.join(outpath, basename +'_bilatdepth.png') # cv2.imwrite(bilat_fn, depth) rt_info = write_mesh(img, depth, int_mtx, mesh_fi, config, rgb_model, depth_edge_model, depth_edge_model, depth_feat_model) if rt_info is not False and gen_inpainted_mesh_demos: run_3dphoto_videos(mesh_fi, basename, outpath, 300, 40, [0.03, 0.03, 0.05, 0.03], ['double-straight-line', 'double-straight-line', 'circle', 'circle'], [0.00, 0.00, -0.015, -0.015], [0.00, 0.00, -0.015, -0.00], [-0.05, -0.05, -0.05, -0.05], ['dolly-zoom-in', 'zoom-in', 'circle', 'swing'], False, vid_format, vid_ssaa) backbone.torch_gc() finally: del rgb_model rgb_model = None del depth_edge_model depth_edge_model = None del depth_feat_model depth_feat_model = None backbone.torch_gc() return mesh_fi def run_3dphoto_videos(mesh_fi, basename, outpath, num_frames, fps, crop_border, traj_types, x_shift_range, y_shift_range, z_shift_range, video_postfix, vid_dolly, vid_format, vid_ssaa): import vispy try: if platform.system() == 'Windows': vispy.use(app='PyQt5') elif platform.system() == 'Darwin': vispy.use('PyQt6') else: vispy.use(app='egl') except: import traceback print(traceback.format_exc()) print('Trying an alternative...') for u in ['PyQt5', 'PyQt6', 'egl']: try: vispy.use(app=u) break except: print(f'On {u}') print(traceback.format_exc()) # Honestly, I don't know if it actually helps at all # read ply global video_mesh_data, video_mesh_fn if video_mesh_fn is None or video_mesh_fn != mesh_fi: try: del video_mesh_data except: print("del video_mesh_data failed") video_mesh_fn = mesh_fi video_mesh_data = read_mesh(mesh_fi) verts, colors, faces, Height, Width, hFov, vFov, mean_loc_depth = video_mesh_data original_w = output_w = W = Width original_h = output_h = H = Height int_mtx = np.array([[max(H, W), 0, W // 2], [0, max(H, W), H // 2], [0, 0, 1]]).astype(np.float32) if int_mtx.max() > 1: int_mtx[0, :] = int_mtx[0, :] / float(W) int_mtx[1, :] = int_mtx[1, :] / float(H) config = {} config['video_folder'] = outpath config['num_frames'] = num_frames config['fps'] = fps config['crop_border'] = crop_border config['traj_types'] = traj_types config['x_shift_range'] = x_shift_range config['y_shift_range'] = y_shift_range config['z_shift_range'] = z_shift_range config['video_postfix'] = video_postfix config['ssaa'] = vid_ssaa # from inpaint.utils.get_MiDaS_samples generic_pose = np.eye(4) assert len(config['traj_types']) == len(config['x_shift_range']) == \ len(config['y_shift_range']) == len(config['z_shift_range']) == len(config['video_postfix']), \ "The number of elements in 'traj_types', 'x_shift_range', 'y_shift_range', 'z_shift_range' and \ 'video_postfix' should be equal." tgt_pose = [[generic_pose * 1]] tgts_poses = [] for traj_idx in range(len(config['traj_types'])): tgt_poses = [] sx, sy, sz = path_planning(config['num_frames'], config['x_shift_range'][traj_idx], config['y_shift_range'][traj_idx], config['z_shift_range'][traj_idx], path_type=config['traj_types'][traj_idx]) for xx, yy, zz in zip(sx, sy, sz): tgt_poses.append(generic_pose * 1.) tgt_poses[-1][:3, -1] = np.array([xx, yy, zz]) tgts_poses += [tgt_poses] tgt_pose = generic_pose * 1 # seems we only need the depthmap to calc mean_loc_depth, which is only used when doing 'dolly' # width and height are already in the ply file in the comments .. # might try to add the mean_loc_depth to it too # did just that # mean_loc_depth = img_depth[img_depth.shape[0]//2, img_depth.shape[1]//2] print("Generating videos ..") normal_canvas, all_canvas = None, None videos_poses, video_basename = copy.deepcopy(tgts_poses), basename top = (original_h // 2 - int_mtx[1, 2] * output_h) left = (original_w // 2 - int_mtx[0, 2] * output_w) down, right = top + output_h, left + output_w border = [int(xx) for xx in [top, down, left, right]] normal_canvas, all_canvas, fn_saved = output_3d_photo(verts.copy(), colors.copy(), faces.copy(), copy.deepcopy(Height), copy.deepcopy(Width), copy.deepcopy(hFov), copy.deepcopy(vFov), copy.deepcopy(tgt_pose), config['video_postfix'], copy.deepcopy(generic_pose), copy.deepcopy(config['video_folder']), None, copy.deepcopy(int_mtx), config, None, videos_poses, video_basename, original_h, original_w, border=border, depth=None, normal_canvas=normal_canvas, all_canvas=all_canvas, mean_loc_depth=mean_loc_depth, dolly=vid_dolly, fnExt=vid_format) return fn_saved def run_makevideo(fn_mesh, vid_numframes, vid_fps, vid_traj, vid_shift, vid_border, dolly, vid_format, vid_ssaa, outpath=None, basename=None): if len(fn_mesh) == 0 or not os.path.exists(fn_mesh): raise Exception("Could not open mesh.") vid_ssaa = int(vid_ssaa) # traj type if vid_traj == 0: vid_traj = ['straight-line'] elif vid_traj == 1: vid_traj = ['double-straight-line'] elif vid_traj == 2: vid_traj = ['circle'] num_fps = int(vid_fps) num_frames = int(vid_numframes) shifts = vid_shift.split(',') if len(shifts) != 3: raise Exception("Translate requires 3 elements.") x_shift_range = [float(shifts[0])] y_shift_range = [float(shifts[1])] z_shift_range = [float(shifts[2])] borders = vid_border.split(',') if len(borders) != 4: raise Exception("Crop Border requires 4 elements.") crop_border = [float(borders[0]), float(borders[1]), float(borders[2]), float(borders[3])] if not outpath: outpath = backbone.get_outpath() if not basename: # output path and filename mess .. basename = Path(fn_mesh).stem # unique filename basecount = backbone.get_next_sequence_number(outpath, basename) if basecount > 0: basecount = basecount - 1 fullfn = None for i in range(500): fn = f"{basecount + i:05}" if basename == '' else f"{basename}-{basecount + i:04}" fullfn = os.path.join(outpath, f"{fn}_." + vid_format) if not os.path.exists(fullfn): break basename = Path(fullfn).stem basename = basename[:-1] print("Loading mesh ..") fn_saved = run_3dphoto_videos(fn_mesh, basename, outpath, num_frames, num_fps, crop_border, vid_traj, x_shift_range, y_shift_range, z_shift_range, [''], dolly, vid_format, vid_ssaa) return fn_saved[-1], fn_saved[-1], '' def unload_models(): model_holder.unload_models() # TODO: code borrowed from the internet to be marked as such and to reside in separate files def batched_background_removal(inimages, model_name): from rembg import new_session, remove print('creating background masks') outimages = [] # model path and name bg_model_dir = Path.joinpath(Path().resolve(), "models/rem_bg") os.makedirs(bg_model_dir, exist_ok=True) os.environ["U2NET_HOME"] = str(bg_model_dir) # starting a session background_removal_session = new_session(model_name) for count in range(0, len(inimages)): bg_remove_img = np.array(remove(inimages[count], session=background_removal_session)) outimages.append(Image.fromarray(bg_remove_img)) # The line below might be redundant del background_removal_session return outimages def pano_depth_to_world_points(depth): """ 360 depth to world points given 2D depth is an equirectangular projection of a spherical image Treat depth as radius longitude : -pi to pi latitude : -pi/2 to pi/2 """ # Convert depth to radius radius = depth.flatten() lon = np.linspace(-np.pi, np.pi, depth.shape[1]) lat = np.linspace(-np.pi / 2, np.pi / 2, depth.shape[0]) lon, lat = np.meshgrid(lon, lat) lon = lon.flatten() lat = lat.flatten() # Convert to cartesian coordinates x = radius * np.cos(lat) * np.cos(lon) y = radius * np.cos(lat) * np.sin(lon) z = radius * np.sin(lat) pts3d = np.stack([x, y, z], axis=1) return pts3d def depth_edges_mask(depth): """Returns a mask of edges in the depth map. Args: depth: 2D numpy array of shape (H, W) with dtype float32. Returns: mask: 2D numpy array of shape (H, W) with dtype bool. """ # Compute the x and y gradients of the depth map. depth_dx, depth_dy = np.gradient(depth) # Compute the gradient magnitude. depth_grad = np.sqrt(depth_dx ** 2 + depth_dy ** 2) # Compute the edge mask. mask = depth_grad > 0.05 return mask def create_mesh(image, depth, keep_edges=False, spherical=False): import trimesh from dzoedepth.utils.geometry import depth_to_points, create_triangles maxsize = backbone.get_opt('depthmap_script_mesh_maxsize', 2048) # limit the size of the input image image.thumbnail((maxsize, maxsize)) if not spherical: pts3d = depth_to_points(depth[None]) else: pts3d = pano_depth_to_world_points(depth) pts3d = pts3d.reshape(-1, 3) verts = pts3d.reshape(-1, 3) image = np.array(image) if keep_edges: triangles = create_triangles(image.shape[0], image.shape[1]) else: triangles = create_triangles(image.shape[0], image.shape[1], mask=~depth_edges_mask(depth)) colors = image.reshape(-1, 3) mesh = trimesh.Trimesh(vertices=verts, faces=triangles, vertex_colors=colors) # rotate 90deg over X when spherical if spherical: angle = math.pi / 2 direction = [1, 0, 0] center = [0, 0, 0] rot_matrix = trimesh.transformations.rotation_matrix(angle, direction, center) mesh.apply_transform(rot_matrix) return mesh