File size: 3,137 Bytes
aaa261a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# Copied from ImageDream
# https://github.com/bytedance/ImageDream/blob/main/extern/ImageDream/imagedream/camera_utils.py
import numpy as np
import torch
def create_camera_to_world_matrix(elevation, azimuth):
elevation = np.radians(elevation)
azimuth = np.radians(azimuth)
# Convert elevation and azimuth angles to Cartesian coordinates on a unit sphere
x = np.cos(elevation) * np.sin(azimuth)
y = np.sin(elevation)
z = np.cos(elevation) * np.cos(azimuth)
# Calculate camera position, target, and up vectors
camera_pos = np.array([x, y, z])
target = np.array([0, 0, 0])
up = np.array([0, 1, 0])
# Construct view matrix
forward = target - camera_pos
forward /= np.linalg.norm(forward)
right = np.cross(forward, up)
right /= np.linalg.norm(right)
new_up = np.cross(right, forward)
new_up /= np.linalg.norm(new_up)
cam2world = np.eye(4)
cam2world[:3, :3] = np.array([right, new_up, -forward]).T
cam2world[:3, 3] = camera_pos
return cam2world
def convert_opengl_to_blender(camera_matrix):
if isinstance(camera_matrix, np.ndarray):
# Construct transformation matrix to convert from OpenGL space to Blender space
flip_yz = np.array([[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]])
camera_matrix_blender = np.dot(flip_yz, camera_matrix)
else:
# Construct transformation matrix to convert from OpenGL space to Blender space
flip_yz = torch.tensor(
[[1, 0, 0, 0], [0, 0, -1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]
)
if camera_matrix.ndim == 3:
flip_yz = flip_yz.unsqueeze(0)
camera_matrix_blender = torch.matmul(flip_yz.to(camera_matrix), camera_matrix)
return camera_matrix_blender
def normalize_camera(camera_matrix):
"""normalize the camera location onto a unit-sphere"""
if isinstance(camera_matrix, np.ndarray):
camera_matrix = camera_matrix.reshape(-1, 4, 4)
translation = camera_matrix[:, :3, 3]
translation = translation / (
np.linalg.norm(translation, axis=1, keepdims=True) + 1e-8
)
camera_matrix[:, :3, 3] = translation
else:
camera_matrix = camera_matrix.reshape(-1, 4, 4)
translation = camera_matrix[:, :3, 3]
translation = translation / (
torch.norm(translation, dim=1, keepdim=True) + 1e-8
)
camera_matrix[:, :3, 3] = translation
return camera_matrix.reshape(-1, 16)
def get_camera(
num_frames,
elevation=15,
azimuth_start=0,
azimuth_span=360,
blender_coord=True,
extra_view=False,
):
angle_gap = azimuth_span / num_frames
cameras = []
for azimuth in np.arange(azimuth_start, azimuth_span + azimuth_start, angle_gap):
camera_matrix = create_camera_to_world_matrix(elevation, azimuth)
if blender_coord:
camera_matrix = convert_opengl_to_blender(camera_matrix)
cameras.append(camera_matrix.flatten())
if extra_view:
dim = len(cameras[0])
cameras.append(np.zeros(dim))
return torch.tensor(np.stack(cameras, 0)).float()
|