Spaces:

hpwang
/

VistaDream

Build error

App Files Files Community

VistaDream / ops /gs /basic.py

hpwang

[Init]

fd5e0f7 7 months ago

raw

history blame contribute delete

12.2 kB

	import PIL
	import torch
	import numpy as np
	import gsplat as gs
	import torch.nn as nn
	from copy import deepcopy
	import torch.nn.functional as F
	from dataclasses import dataclass
	from ops.utils import (
	dpt2xyz,
	alpha_inpaint_mask,
	transform_points,
	numpy_normalize,
	numpy_quaternion_from_matrix
	)

	@dataclass
	class Frame():
	'''
	rgb: in shape of HW3, in range of 0-1
	dpt: in shape of H*W, real depth
	inpaint: bool mask in shape of H*W for inpainting
	intrinsic: 3*3
	extrinsic: array in shape of 4*4

	As a class for:
	initialize camera
	accept rendering result
	accept inpainting result
	All at 2D-domain
	'''
	def __init__(self,
	H: int = None,
	W: int = None,
	rgb: np.array = None,
	dpt: np.array = None,
	sky: np.array = None,
	inpaint: np.array = None,
	intrinsic: np.array = None,
	extrinsic: np.array = None,
	# detailed target
	ideal_dpt: np.array = None,
	ideal_nml: np.array = None,
	prompt: str = None) -> None:
	self.H = H
	self.W = W
	self.rgb = rgb
	self.dpt = dpt
	self.sky = sky
	self.prompt = prompt
	self.intrinsic = intrinsic
	self.extrinsic = extrinsic
	self._rgb_rect()
	self._extr_rect()
	# for inpainting
	self.inpaint = inpaint
	self.inpaint_wo_edge = inpaint
	# for supervision
	self.ideal_dpt = ideal_dpt
	self.ideal_nml = ideal_nml

	def _rgb_rect(self):
	if self.rgb is not None:
	if isinstance(self.rgb, PIL.PngImagePlugin.PngImageFile):
	self.rgb = np.array(self.rgb)
	if isinstance(self.rgb, PIL.JpegImagePlugin.JpegImageFile):
	self.rgb = np.array(self.rgb)
	if np.amax(self.rgb) > 1.1:
	self.rgb = self.rgb / 255

	def _extr_rect(self):
	if self.extrinsic is None: self.extrinsic = np.eye(4)
	self.inv_extrinsic = np.linalg.inv(self.extrinsic)

	@dataclass
	class Gaussian_Frame():
	'''
	In-frame-frustrum
	Gaussians from a single RGBD frame
	As a class for:
	accept information from initialized/inpainting+geo-estimated frame
	saving pixelsplat properties including rgb, xyz, scale, rotation, opacity; note here, we made a modification to xyz;
	we first project depth to xyz
	then we tune a scale map(initialized to ones) and a shift map(initialized to zeros), they are optimized and add to the original xyz when rendering
	'''
	# as pixelsplat guassian
	rgb: torch.Tensor = None,
	scale: torch.Tensor = None,
	opacity: torch.Tensor = None,
	rotation: torch.Tensor = None,
	# gaussian center
	dpt: torch.Tensor = None,
	xyz: torch.Tensor = None,
	# as a frame
	H: int = 480,
	W: int = 640,

	def __init__(self, frame: Frame, device = 'cuda'):
	'''after inpainting'''
	# de-active functions
	self.rgbs_deact = torch.logit
	self.scales_deact = torch.log
	self.opacity_deact = torch.logit
	self.device = device
	# for gaussian initialization
	self._set_property_from_frame(frame)

	def _to_3d(self):
	# inv intrinsic
	xyz = dpt2xyz(self.dpt,self.intrinsic)
	inv_extrinsic = np.linalg.inv(self.extrinsic)
	xyz = transform_points(xyz,inv_extrinsic)
	return xyz

	def _paint_filter(self,paint_mask):
	if np.sum(paint_mask)<3:
	paint_mask = np.zeros((self.H,self.W))
	paint_mask[0:1] = 1
	paint_mask = paint_mask>.5
	self.rgb = self.rgb[paint_mask]
	self.xyz = self.xyz[paint_mask]
	self.scale = self.scale[paint_mask]
	self.opacity = self.opacity[paint_mask]
	self.rotation = self.rotation[paint_mask]

	def _to_cuda(self):
	self.rgb = torch.from_numpy(self.rgb.astype(np.float32)).to(self.device)
	self.xyz = torch.from_numpy(self.xyz.astype(np.float32)).to(self.device)
	self.scale = torch.from_numpy(self.scale.astype(np.float32)).to(self.device)
	self.opacity = torch.from_numpy(self.opacity.astype(np.float32)).to(self.device)
	self.rotation = torch.from_numpy(self.rotation.astype(np.float32)).to(self.device)

	def _fine_init_scale_rotations(self):
	# from https://arxiv.org/pdf/2406.09394
	""" Compute rotation matrices that align z-axis with given normal vectors using matrix operations. """
	up_axis = np.array([0,1,0])
	nml = self.nml @ self.extrinsic[0:3,0:3]
	qz = numpy_normalize(nml)
	qx = np.cross(up_axis,qz)
	qx = numpy_normalize(qx)
	qy = np.cross(qz,qx)
	qy = numpy_normalize(qy)
	rot = np.concatenate([qx[...,None],qy[...,None],qz[...,None]],axis=-1)
	self.rotation = numpy_quaternion_from_matrix(rot)
	# scale
	safe_nml = deepcopy(self.nml)
	safe_nml[safe_nml[:,:,-1]<0.2,-1] = .2
	normal_xoz = deepcopy(safe_nml)
	normal_yoz = deepcopy(safe_nml)
	normal_xoz[...,1] = 0.
	normal_yoz[...,0] = 0.
	normal_xoz = numpy_normalize(normal_xoz)
	normal_yoz = numpy_normalize(normal_yoz)
	cos_theta_x = np.abs(normal_xoz[...,2])
	cos_theta_y = np.abs(normal_yoz[...,2])
	scale_basic = self.dpt / self.intrinsic[0,0] / np.sqrt(2)
	scale_x = scale_basic / cos_theta_x
	scale_y = scale_basic / cos_theta_y
	scale_z = (scale_x + scale_y) / 10.
	self.scale = np.concatenate([scale_x[...,None],
	scale_y[...,None],
	scale_z[...,None]],axis=-1)

	def _coarse_init_scale_rotations(self):
	# gaussian property -- HW3 scale
	self.scale = self.dpt / self.intrinsic[0,0] / np.sqrt(2)
	self.scale = self.scale[:,:,None].repeat(3,-1)
	# gaussian property -- HW4 rotation
	self.rotation = np.zeros((self.H,self.W,4))
	self.rotation[:,:,0] = 1.

	def _set_property_from_frame(self,frame: Frame):
	'''frame here is a complete init/inpainted frame'''
	# basic frame-level property
	self.H = frame.H
	self.W = frame.W
	self.dpt = frame.dpt
	self.intrinsic = frame.intrinsic
	self.extrinsic = frame.extrinsic
	# gaussian property -- xyz with train-able pixel-aligned scale and shift
	self.xyz = self._to_3d()
	# gaussian property -- HW3 rgb
	self.rgb = frame.rgb
	# gaussian property -- HW4 rotation HW3 scale
	self._coarse_init_scale_rotations()
	# gaussian property -- HW opacity
	self.opacity = np.ones((self.H,self.W,1)) * 0.8
	# to cuda
	self._paint_filter(frame.inpaint_wo_edge)
	self._to_cuda()
	# de-activate
	self.rgb = self.rgbs_deact(self.rgb)
	self.scale = self.scales_deact(self.scale)
	self.opacity = self.opacity_deact(self.opacity)
	# to torch parameters
	self.rgb = nn.Parameter(self.rgb,requires_grad=False)
	self.xyz = nn.Parameter(self.xyz,requires_grad=False)
	self.scale = nn.Parameter(self.scale,requires_grad=False)
	self.opacity = nn.Parameter(self.opacity,requires_grad=False)
	self.rotation = nn.Parameter(self.rotation,requires_grad=False)

	def _require_grad(self,sign=True):
	self.rgb = self.rgb.requires_grad_(sign)
	self.xyz = self.xyz.requires_grad_(sign)
	self.scale = self.scale.requires_grad_(sign)
	self.opacity = self.opacity.requires_grad_(sign)
	self.rotation = self.rotation.requires_grad_(sign)

	class Gaussian_Scene():
	def __init__(self,cfg=None):
	# frames initialing the frame
	self.frames = []
	self.gaussian_frames: list[Gaussian_Frame] = [] # gaussian frame require training at this optimization
	# activate fuctions
	self.rgbs_act = torch.sigmoid
	self.scales_act = torch.exp
	self.opacity_act = torch.sigmoid
	self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
	# for traj generation
	self.traj_type = 'spiral'
	if cfg is not None:
	self.traj_min_percentage = cfg.scene.traj.near_percentage
	self.traj_max_percentage = cfg.scene.traj.far_percentage
	self.traj_forward_ratio = cfg.scene.traj.traj_forward_ratio
	self.traj_backward_ratio = cfg.scene.traj.traj_backward_ratio
	else:
	self.traj_min_percentage,self.traj_max_percentage,self.traj_forward_ratio,self.traj_backward_ratio = 5, 50, 0.3, 0.4

	# basic operations
	def _render_RGBD(self,frame,background_color='black'):
	'''
	:intinsic: tensor of [fu,fv,cu,cv] 4-dimension
	:extinsic: tensor 4*4-dimension
	:out: tensor HW3-dimension
	'''
	background = None
	if background_color =='white':
	background = torch.ones(1,4,device=self.device)*0.1
	background[:,-1] = 0. # for depth
	# aligned untrainable xyz and unaligned trainable xyz
	# others
	xyz = torch.cat([gf.xyz.reshape(-1,3) for gf in self.gaussian_frames],dim=0)
	rgb = torch.cat([gf.rgb.reshape(-1,3) for gf in self.gaussian_frames],dim=0)
	scale = torch.cat([gf.scale.reshape(-1,3) for gf in self.gaussian_frames],dim=0)
	opacity = torch.cat([gf.opacity.reshape(-1) for gf in self.gaussian_frames],dim=0)
	rotation = torch.cat([gf.rotation.reshape(-1,4) for gf in self.gaussian_frames],dim=0)
	# activate
	rgb = self.rgbs_act(rgb)
	scale = self.scales_act(scale)
	rotation = F.normalize(rotation,dim=1)
	opacity = self.opacity_act(opacity)
	# property
	H,W = frame.H, frame.W
	intrinsic = torch.from_numpy(frame.intrinsic.astype(np.float32)).to(self.device)
	extrinsic = torch.from_numpy(frame.extrinsic.astype(np.float32)).to(self.device)
	# render
	render_out,render_alpha,_ = gs.rendering.rasterization(means = xyz,
	scales = scale,
	quats = rotation,
	opacities = opacity,
	colors = rgb,
	Ks = intrinsic[None],
	viewmats = extrinsic[None],
	width = W,
	height = H,
	packed = False,
	near_plane= 0.01,
	render_mode="RGB+ED",
	backgrounds=background) # render: 1HW*(3+1)
	render_out = render_out.squeeze() # result: HW(3+1)
	render_rgb = render_out[:,:,0:3]
	render_dpt = render_out[:,:,-1]
	return render_rgb, render_dpt, render_alpha

	@torch.no_grad()
	def _render_for_inpaint(self,frame):
	# first render
	render_rgb, render_dpt, render_alpha = self._render_RGBD(frame)
	render_msk = alpha_inpaint_mask(render_alpha)
	# to numpy
	render_rgb = render_rgb.detach().cpu().numpy()
	render_dpt = render_dpt.detach().cpu().numpy()
	render_alpha = render_alpha.detach().cpu().numpy()
	# assign back
	frame.rgb = render_rgb
	frame.dpt = render_dpt
	frame.inpaint = render_msk
	return frame

	def _add_trainable_frame(self,frame:Frame,require_grad=True):
	# for the init frame, we keep all pixels for finetuning
	self.frames.append(frame)
	gf = Gaussian_Frame(frame, self.device)
	gf._require_grad(require_grad)
	self.gaussian_frames.append(gf)