StyleNeRF / dnnlib /geometry.py
Jiatao Gu
add code from the original repo
94ada0b
raw
history blame
16.3 kB
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import torch
import torch.nn.functional as F
import math
import random
import numpy as np
def positional_encoding(p, size, pe='normal', use_pos=False):
if pe == 'gauss':
p_transformed = np.pi * p @ size
p_transformed = torch.cat(
[torch.sin(p_transformed), torch.cos(p_transformed)], dim=-1)
else:
p_transformed = torch.cat([torch.cat(
[torch.sin((2 ** i) * np.pi * p),
torch.cos((2 ** i) * np.pi * p)],
dim=-1) for i in range(size)], dim=-1)
if use_pos:
p_transformed = torch.cat([p_transformed, p], -1)
return p_transformed
def upsample(img_nerf, size, filter=None):
up = size // img_nerf.size(-1)
if up <= 1:
return img_nerf
if filter is not None:
from torch_utils.ops import upfirdn2d
for _ in range(int(math.log2(up))):
img_nerf = upfirdn2d.downsample2d(img_nerf, filter, up=2)
else:
img_nerf = F.interpolate(img_nerf, (size, size), mode='bilinear', align_corners=False)
return img_nerf
def downsample(img0, size, filter=None):
down = img0.size(-1) // size
if down <= 1:
return img0
if filter is not None:
from torch_utils.ops import upfirdn2d
for _ in range(int(math.log2(down))):
img0 = upfirdn2d.downsample2d(img0, filter, down=2)
else:
img0 = F.interpolate(img0, (size, size), mode='bilinear', align_corners=False)
return img0
def normalize_vecs(vectors: torch.Tensor) -> torch.Tensor:
"""
Normalize vector lengths.
"""
return vectors / (torch.norm(vectors, dim=-1, keepdim=True))
def repeat_vecs(vecs, n, dim=0):
return torch.stack(n*[vecs], dim=dim)
def get_grids(H, W, device, align=True):
ch = 1 if align else 1 - (1 / H)
cw = 1 if align else 1 - (1 / W)
x, y = torch.meshgrid(torch.linspace(-cw, cw, W, device=device),
torch.linspace(ch, -ch, H, device=device))
return torch.stack([x, y], -1)
def local_ensemble(pi, po, resolution):
ii = range(resolution)
ia = torch.tensor([max((i - 1)//2, 0) for i in ii]).long()
ib = torch.tensor([min((i + 1)//2, resolution//2-1) for i in ii]).long()
ul = torch.meshgrid(ia, ia)
ur = torch.meshgrid(ia, ib)
ll = torch.meshgrid(ib, ia)
lr = torch.meshgrid(ib, ib)
d_ul, p_ul = po - pi[ul], torch.stack(ul, -1)
d_ur, p_ur = po - pi[ur], torch.stack(ur, -1)
d_ll, p_ll = po - pi[ll], torch.stack(ll, -1)
d_lr, p_lr = po - pi[lr], torch.stack(lr, -1)
c_ul = d_ul.prod(dim=-1).abs()
c_ur = d_ur.prod(dim=-1).abs()
c_ll = d_ll.prod(dim=-1).abs()
c_lr = d_lr.prod(dim=-1).abs()
D = torch.stack([d_ul, d_ur, d_ll, d_lr], 0)
P = torch.stack([p_ul, p_ur, p_ll, p_lr], 0)
C = torch.stack([c_ul, c_ur, c_ll, c_lr], 0)
C = C / C.sum(dim=0, keepdim=True)
return D, P, C
def get_initial_rays_trig(num_steps, fov, resolution, ray_start, ray_end, device='cpu'):
"""Returns sample points, z_vals, ray directions in camera space."""
W, H = resolution
# Create full screen NDC (-1 to +1) coords [x, y, 0, 1].
# Y is flipped to follow image memory layouts.
x, y = torch.meshgrid(torch.linspace(-1, 1, W, device=device),
torch.linspace(1, -1, H, device=device))
x = x.T.flatten()
y = y.T.flatten()
z = -torch.ones_like(x, device=device) / math.tan((2 * math.pi * fov / 360)/2)
rays_d_cam = normalize_vecs(torch.stack([x, y, z], -1))
z_vals = torch.linspace(ray_start, ray_end, num_steps, device=device).reshape(1, num_steps, 1).repeat(W*H, 1, 1)
points = rays_d_cam.unsqueeze(1).repeat(1, num_steps, 1) * z_vals
return points, z_vals, rays_d_cam
def sample_camera_positions(
device, n=1, r=1, horizontal_stddev=1, vertical_stddev=1,
horizontal_mean=math.pi*0.5, vertical_mean=math.pi*0.5, mode='normal'):
"""
Samples n random locations along a sphere of radius r.
Uses a gaussian distribution for pitch and yaw
"""
if mode == 'uniform':
theta = (torch.rand((n, 1),device=device) - 0.5) * 2 * horizontal_stddev + horizontal_mean
phi = (torch.rand((n, 1),device=device) - 0.5) * 2 * vertical_stddev + vertical_mean
elif mode == 'normal' or mode == 'gaussian':
theta = torch.randn((n, 1), device=device) * horizontal_stddev + horizontal_mean
phi = torch.randn((n, 1), device=device) * vertical_stddev + vertical_mean
elif mode == 'hybrid':
if random.random() < 0.5:
theta = (torch.rand((n, 1),device=device) - 0.5) * 2 * horizontal_stddev * 2 + horizontal_mean
phi = (torch.rand((n, 1),device=device) - 0.5) * 2 * vertical_stddev * 2 + vertical_mean
else:
theta = torch.randn((n, 1), device=device) * horizontal_stddev + horizontal_mean
phi = torch.randn((n, 1), device=device) * vertical_stddev + vertical_mean
else:
phi = torch.ones((n, 1), device=device, dtype=torch.float) * vertical_mean
theta = torch.ones((n, 1), device=device, dtype=torch.float) * horizontal_mean
phi = torch.clamp(phi, 1e-5, math.pi - 1e-5)
output_points = torch.zeros((n, 3), device=device)# torch.cuda.FloatTensor(n, 3).fill_(0)#torch.zeros((n, 3))
output_points[:, 0:1] = r*torch.sin(phi) * torch.cos(theta)
output_points[:, 2:3] = r*torch.sin(phi) * torch.sin(theta)
output_points[:, 1:2] = r*torch.cos(phi)
return output_points, phi, theta
def perturb_points(points, z_vals, ray_directions, device):
distance_between_points = z_vals[:,:,1:2,:] - z_vals[:,:,0:1,:]
offset = (torch.rand(z_vals.shape, device=device)-0.5) * distance_between_points
z_vals = z_vals + offset
points = points + offset * ray_directions.unsqueeze(2)
return points, z_vals
def create_cam2world_matrix(forward_vector, origin, device=None):
"""Takes in the direction the camera is pointing and the camera origin and returns a world2cam matrix."""
forward_vector = normalize_vecs(forward_vector)
up_vector = torch.tensor([0, 1, 0], dtype=torch.float, device=device).expand_as(forward_vector)
left_vector = normalize_vecs(torch.cross(up_vector, forward_vector, dim=-1))
up_vector = normalize_vecs(torch.cross(forward_vector, left_vector, dim=-1))
rotation_matrix = torch.eye(4, device=device).unsqueeze(0).repeat(forward_vector.shape[0], 1, 1)
rotation_matrix[:, :3, :3] = torch.stack((-left_vector, up_vector, -forward_vector), axis=-1)
translation_matrix = torch.eye(4, device=device).unsqueeze(0).repeat(forward_vector.shape[0], 1, 1)
translation_matrix[:, :3, 3] = origin
cam2world = translation_matrix @ rotation_matrix
return cam2world
def transform_sampled_points(
points, z_vals, ray_directions, device,
h_stddev=1, v_stddev=1, h_mean=math.pi * 0.5,
v_mean=math.pi * 0.5, mode='normal'):
"""
points: batch_size x total_pixels x num_steps x 3
z_vals: batch_size x total_pixels x num_steps
"""
n, num_rays, num_steps, channels = points.shape
points, z_vals = perturb_points(points, z_vals, ray_directions, device)
camera_origin, pitch, yaw = sample_camera_positions(
n=points.shape[0], r=1,
horizontal_stddev=h_stddev, vertical_stddev=v_stddev,
horizontal_mean=h_mean, vertical_mean=v_mean,
device=device, mode=mode)
forward_vector = normalize_vecs(-camera_origin)
cam2world_matrix = create_cam2world_matrix(forward_vector, camera_origin, device=device)
points_homogeneous = torch.ones((points.shape[0], points.shape[1], points.shape[2], points.shape[3] + 1), device=device)
points_homogeneous[:, :, :, :3] = points
# should be n x 4 x 4 , n x r^2 x num_steps x 4
transformed_points = torch.bmm(cam2world_matrix, points_homogeneous.reshape(n, -1, 4).permute(0,2,1)).permute(0, 2, 1).reshape(n, num_rays, num_steps, 4)
transformed_ray_directions = torch.bmm(cam2world_matrix[..., :3, :3], ray_directions.reshape(n, -1, 3).permute(0,2,1)).permute(0, 2, 1).reshape(n, num_rays, 3)
homogeneous_origins = torch.zeros((n, 4, num_rays), device=device)
homogeneous_origins[:, 3, :] = 1
transformed_ray_origins = torch.bmm(cam2world_matrix, homogeneous_origins).permute(0, 2, 1).reshape(n, num_rays, 4)[..., :3]
return transformed_points[..., :3], z_vals, transformed_ray_directions, transformed_ray_origins, pitch, yaw
def integration(
rgb_sigma, z_vals, device, noise_std=0.5,
last_back=False, white_back=False, clamp_mode=None, fill_mode=None):
rgbs = rgb_sigma[..., :3]
sigmas = rgb_sigma[..., 3:]
deltas = z_vals[..., 1:, :] - z_vals[..., :-1, :]
delta_inf = 1e10 * torch.ones_like(deltas[..., :1, :])
deltas = torch.cat([deltas, delta_inf], -2)
if noise_std > 0:
noise = torch.randn(sigmas.shape, device=device) * noise_std
else:
noise = 0
if clamp_mode == 'softplus':
alphas = 1 - torch.exp(-deltas * (F.softplus(sigmas + noise)))
elif clamp_mode == 'relu':
alphas = 1 - torch.exp(-deltas * (F.relu(sigmas + noise)))
else:
raise "Need to choose clamp mode"
alphas_shifted = torch.cat([torch.ones_like(alphas[..., :1, :]), 1-alphas + 1e-10], -2)
weights = alphas * torch.cumprod(alphas_shifted, -2)[..., :-1, :]
weights_sum = weights.sum(-2)
if last_back:
weights[..., -1, :] += (1 - weights_sum)
rgb_final = torch.sum(weights * rgbs, -2)
depth_final = torch.sum(weights * z_vals, -2)
if white_back:
rgb_final = rgb_final + 1-weights_sum
if fill_mode == 'debug':
rgb_final[weights_sum.squeeze(-1) < 0.9] = torch.tensor([1., 0, 0], device=rgb_final.device)
elif fill_mode == 'weight':
rgb_final = weights_sum.expand_as(rgb_final)
return rgb_final, depth_final, weights
def get_sigma_field_np(nerf, styles, resolution=512, block_resolution=64):
# return numpy array of forwarded sigma value
bound = (nerf.depth_range[1] - nerf.depth_range[0]) * 0.5
X = torch.linspace(-bound, bound, resolution).split(block_resolution)
sigma_np = np.zeros([resolution, resolution, resolution], dtype=np.float32)
for xi, xs in enumerate(X):
for yi, ys in enumerate(X):
for zi, zs in enumerate(X):
xx, yy, zz = torch.meshgrid(xs, ys, zs)
pts = torch.stack([xx, yy, zz], dim=-1).unsqueeze(0).to(styles.device) # B, H, H, H, C
block_shape = [1, len(xs), len(ys), len(zs)]
feat_out, sigma_out = nerf.fg_nerf.forward_style2(pts, None, block_shape, ws=styles)
sigma_np[xi * block_resolution: xi * block_resolution + len(xs), \
yi * block_resolution: yi * block_resolution + len(ys), \
zi * block_resolution: zi * block_resolution + len(zs)] = sigma_out.reshape(block_shape[1:]).detach().cpu().numpy()
return sigma_np, bound
def extract_geometry(nerf, styles, resolution, threshold):
import mcubes
print('threshold: {}'.format(threshold))
u, bound = get_sigma_field_np(nerf, styles, resolution)
vertices, triangles = mcubes.marching_cubes(u, threshold)
b_min_np = np.array([-bound, -bound, -bound])
b_max_np = np.array([ bound, bound, bound])
vertices = vertices / (resolution - 1.0) * (b_max_np - b_min_np)[None, :] + b_min_np[None, :]
return vertices.astype('float32'), triangles
def render_mesh(meshes, camera_matrices, render_noise=True):
from pytorch3d.renderer import (
FoVPerspectiveCameras, look_at_view_transform,
RasterizationSettings, BlendParams,
MeshRenderer, MeshRasterizer, HardPhongShader, TexturesVertex
)
from pytorch3d.ops import interpolate_face_attributes
from pytorch3d.structures.meshes import Meshes
intrinsics, poses, _, _ = camera_matrices
device = poses.device
c2w = torch.matmul(poses, torch.diag(torch.tensor([-1.0, 1.0, -1.0, 1.0], device=device))[None, :, :]) # Different camera model...
w2c = torch.inverse(c2w)
R = c2w[:, :3, :3]
T = w2c[:, :3, 3] # So weird..... Why one is c2w and another is w2c?
focal = intrinsics[0, 0, 0]
fov = torch.arctan(focal) * 2.0 / np.pi * 180
colors = []
offset = 1
for res, (mesh, face_vert_noise) in meshes.items():
raster_settings = RasterizationSettings(
image_size=res,
blur_radius=0.0,
faces_per_pixel=1,
)
mesh = Meshes(
verts=[torch.from_numpy(mesh.vertices).float().to(device)],
faces=[torch.from_numpy(mesh.faces).long().to(device)])
_colors = []
for i in range(len(poses)):
cameras = FoVPerspectiveCameras(device=device, R=R[i: i+1], T=T[i: i+1], fov=fov)
rasterizer = MeshRasterizer(cameras=cameras, raster_settings=raster_settings)
pix_to_face, zbuf, bary_coord, dists = rasterizer(mesh)
color = interpolate_face_attributes(pix_to_face, bary_coord, face_vert_noise).squeeze()
# hack
color[offset:, offset:] = color[:-offset, :-offset]
_colors += [color]
color = torch.stack(_colors, 0).permute(0,3,1,2)
colors += [color]
offset *= 2
return colors
def rotate_vects(v, theta):
theta = theta / math.pi * 2
theta = theta + (theta < 0).type_as(theta) * 4
v = v.reshape(v.size(0), v.size(1) // 4, 4, v.size(2), v.size(3))
vs = []
order = [0,2,3,1] # Not working
iorder = [0,3,1,2] # Not working
for b in range(len(v)):
if (theta[b] - 0) < 1e-6:
u, l = 0, 0
elif (theta[b] - 1) < 1e-6:
u, l = 0, 1
elif (theta[b] - 2) < 1e-6:
u, l = 0, 2
elif (theta[b] - 3) < 1e-6:
u, l = 0, 3
else:
u, l = math.modf(theta[b])
l, r = int(l), int(l + 1) % 4
vv = v[b, :, order] # 0 -> 1 -> 3 -> 2
vl = torch.cat([vv[:, l:], vv[:, :l]], 1)
if u > 0:
vr = torch.cat([vv[:, r:], vv[:, :r]], 1)
vv = vl * (1-u) + vr * u
else:
vv = vl
vs.append(vv[:, iorder])
v = torch.stack(vs, 0)
v = v.reshape(v.size(0), -1, v.size(-2), v.size(-1))
return v
def generate_option_outputs(render_option):
# output debugging outputs (not used in normal rendering process)
if ('depth' in render_option.split(',')):
img = camera_world[:, :1] + fg_depth_map * ray_vector
img = reformat(img, tgt_res)
if 'gradient' in render_option.split(','):
points = (camera_world[:,:,None]+di[:,:,:,None]*ray_vector[:,:,None]).reshape(
batch_size, tgt_res, tgt_res, di.size(-1), 3)
with torch.enable_grad():
gradients = self.fg_nerf.forward_style2(
points, None, [batch_size, tgt_res, di.size(-1), tgt_res], get_normal=True,
ws=styles, z_shape=z_shape_obj, z_app=z_app_obj).reshape(
batch_size, di.size(-1), 3, tgt_res * tgt_res).permute(0,3,1,2)
avg_grads = (gradients * fg_weights.unsqueeze(-1)).sum(-2)
normal = reformat(normalize(avg_grads, axis=2)[0], tgt_res)
img = normal
if 'value' in render_option.split(','):
fg_feat = fg_feat[:,:,3:].norm(dim=-1,keepdim=True)
img = reformat(fg_feat.repeat(1,1,3), tgt_res) / fg_feat.max() * 2 - 1
if 'opacity' in render_option.split(','):
opacity = bg_lambda.unsqueeze(-1).repeat(1,1,3) * 2 - 1
img = reformat(opacity, tgt_res)
if 'normal' in render_option.split(','):
shift_l, shift_r = img[:,:,2:,:], img[:,:,:-2,:]
shift_u, shift_d = img[:,:,:,2:], img[:,:,:,:-2]
diff_hor = normalize(shift_r - shift_l, axis=1)[0][:, :, :, 1:-1]
diff_ver = normalize(shift_u - shift_d, axis=1)[0][:, :, 1:-1, :]
normal = torch.cross(diff_hor, diff_ver, dim=1)
img = normalize(normal, axis=1)[0]
return {'full_out': (None, img), 'reg_loss': {}}