Spaces:
Running
Running
# Copyright (c) Meta Platforms, Inc. and affiliates. | |
# All rights reserved. | |
# | |
# This source code is licensed under the BSD-style license found in the | |
# LICENSE file in the root directory of this source tree. | |
import unittest | |
from itertools import product | |
from typing import Tuple | |
from unittest.mock import patch | |
import torch | |
from pytorch3d.common.compat import meshgrid_ij | |
from pytorch3d.implicitron.models.renderer.base import EvaluationMode | |
from pytorch3d.implicitron.models.renderer.ray_sampler import ( | |
AdaptiveRaySampler, | |
compute_radii, | |
NearFarRaySampler, | |
) | |
from pytorch3d.renderer.cameras import ( | |
CamerasBase, | |
FoVOrthographicCameras, | |
FoVPerspectiveCameras, | |
OrthographicCameras, | |
PerspectiveCameras, | |
) | |
from pytorch3d.renderer.implicit.utils import HeterogeneousRayBundle | |
from tests.common_camera_utils import init_random_cameras | |
from tests.common_testing import TestCaseMixin | |
CAMERA_TYPES = ( | |
FoVPerspectiveCameras, | |
FoVOrthographicCameras, | |
OrthographicCameras, | |
PerspectiveCameras, | |
) | |
def unproject_xy_grid_from_ndc_to_world_coord( | |
cameras: CamerasBase, xy_grid: torch.Tensor | |
) -> Tuple[torch.Tensor, torch.Tensor]: | |
""" | |
Unproject a xy_grid from NDC coordinates to world coordinates. | |
Args: | |
cameras: CamerasBase. | |
xy_grid: A tensor of shape `(..., H*W, 2)` representing the | |
x, y coords. | |
Returns: | |
A tensor of shape `(..., H*W, 3)` representing the | |
""" | |
batch_size = xy_grid.shape[0] | |
n_rays_per_image = xy_grid.shape[1:-1].numel() | |
xy = xy_grid.view(batch_size, -1, 2) | |
xyz = torch.cat([xy, xy_grid.new_ones(batch_size, n_rays_per_image, 1)], dim=-1) | |
plane_at_depth1 = cameras.unproject_points(xyz, from_ndc=True) | |
return plane_at_depth1.view(*xy_grid.shape[:-1], 3) | |
class TestRaysampler(TestCaseMixin, unittest.TestCase): | |
def test_ndc_raysampler_n_ray_total_is_none(self): | |
sampler = NearFarRaySampler() | |
message = ( | |
"If you introduce the support of `n_rays_total` for {0}, please handle the " | |
"packing and unpacking logic for the radii and lengths computation." | |
) | |
self.assertIsNone( | |
sampler._training_raysampler._n_rays_total, message.format(type(sampler)) | |
) | |
self.assertIsNone( | |
sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler)) | |
) | |
sampler = AdaptiveRaySampler() | |
self.assertIsNone( | |
sampler._training_raysampler._n_rays_total, message.format(type(sampler)) | |
) | |
self.assertIsNone( | |
sampler._evaluation_raysampler._n_rays_total, message.format(type(sampler)) | |
) | |
def test_catch_heterogeneous_exception(self): | |
cameras = init_random_cameras(FoVPerspectiveCameras, 1, random_z=True) | |
class FakeSampler: | |
def __init__(self): | |
self.min_x, self.max_x = 1, 2 | |
self.min_y, self.max_y = 1, 2 | |
def __call__(self, **kwargs): | |
return HeterogeneousRayBundle( | |
torch.rand(3), torch.rand(3), torch.rand(3), torch.rand(1) | |
) | |
with patch( | |
"pytorch3d.implicitron.models.renderer.ray_sampler.NDCMultinomialRaysampler", | |
return_value=FakeSampler(), | |
): | |
for sampler in [ | |
AdaptiveRaySampler(cast_ray_bundle_as_cone=True), | |
NearFarRaySampler(cast_ray_bundle_as_cone=True), | |
]: | |
with self.assertRaises(TypeError): | |
_ = sampler(cameras, EvaluationMode.TRAINING) | |
for sampler in [ | |
AdaptiveRaySampler(cast_ray_bundle_as_cone=False), | |
NearFarRaySampler(cast_ray_bundle_as_cone=False), | |
]: | |
_ = sampler(cameras, EvaluationMode.TRAINING) | |
def test_compute_radii(self): | |
batch_size = 1 | |
image_height, image_width = 20, 10 | |
min_y, max_y, min_x, max_x = -1.0, 1.0, -1.0, 1.0 | |
y, x = meshgrid_ij( | |
torch.linspace(min_y, max_y, image_height, dtype=torch.float32), | |
torch.linspace(min_x, max_x, image_width, dtype=torch.float32), | |
) | |
xy_grid = torch.stack([x, y], dim=-1).view(-1, 2) | |
pixel_width = (max_x - min_x) / (image_width - 1) | |
pixel_height = (max_y - min_y) / (image_height - 1) | |
for cam_type in CAMERA_TYPES: | |
# init a batch of random cameras | |
cameras = init_random_cameras(cam_type, batch_size, random_z=True) | |
# This method allow us to compute the radii whithout having | |
# access to the full grid. Raysamplers during the training | |
# will sample random rays from the grid. | |
radii = compute_radii( | |
cameras, xy_grid, pixel_hw_ndc=(pixel_height, pixel_width) | |
) | |
plane_at_depth1 = unproject_xy_grid_from_ndc_to_world_coord( | |
cameras, xy_grid | |
) | |
# This method absolutely needs the full grid to work. | |
expected_radii = compute_pixel_radii_from_grid( | |
plane_at_depth1.reshape(1, image_height, image_width, 3) | |
) | |
self.assertClose(expected_radii.reshape(-1, 1), radii) | |
def test_forward(self): | |
n_rays_per_image = 16 | |
image_height, image_width = 20, 20 | |
kwargs = { | |
"image_width": image_width, | |
"image_height": image_height, | |
"n_pts_per_ray_training": 32, | |
"n_pts_per_ray_evaluation": 32, | |
"n_rays_per_image_sampled_from_mask": n_rays_per_image, | |
"cast_ray_bundle_as_cone": False, | |
} | |
batch_size = 2 | |
samplers = [NearFarRaySampler(**kwargs), AdaptiveRaySampler(**kwargs)] | |
evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION] | |
for cam_type, sampler, evaluation_mode in product( | |
CAMERA_TYPES, samplers, evaluation_modes | |
): | |
cameras = init_random_cameras(cam_type, batch_size, random_z=True) | |
ray_bundle = sampler(cameras, evaluation_mode) | |
shape_out = ( | |
(batch_size, image_width, image_height) | |
if evaluation_mode == EvaluationMode.EVALUATION | |
else (batch_size, n_rays_per_image, 1) | |
) | |
n_pts_per_ray = ( | |
kwargs["n_pts_per_ray_evaluation"] | |
if evaluation_mode == EvaluationMode.EVALUATION | |
else kwargs["n_pts_per_ray_training"] | |
) | |
self.assertIsNone(ray_bundle.bins) | |
self.assertIsNone(ray_bundle.pixel_radii_2d) | |
self.assertEqual( | |
ray_bundle.lengths.shape, | |
(*shape_out, n_pts_per_ray), | |
) | |
self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3)) | |
self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3)) | |
def test_forward_with_use_bins(self): | |
n_rays_per_image = 16 | |
image_height, image_width = 20, 20 | |
kwargs = { | |
"image_width": image_width, | |
"image_height": image_height, | |
"n_pts_per_ray_training": 32, | |
"n_pts_per_ray_evaluation": 32, | |
"n_rays_per_image_sampled_from_mask": n_rays_per_image, | |
"cast_ray_bundle_as_cone": True, | |
} | |
batch_size = 1 | |
samplers = [NearFarRaySampler(**kwargs), AdaptiveRaySampler(**kwargs)] | |
evaluation_modes = [EvaluationMode.TRAINING, EvaluationMode.EVALUATION] | |
for cam_type, sampler, evaluation_mode in product( | |
CAMERA_TYPES, samplers, evaluation_modes | |
): | |
cameras = init_random_cameras(cam_type, batch_size, random_z=True) | |
ray_bundle = sampler(cameras, evaluation_mode) | |
lengths = 0.5 * (ray_bundle.bins[..., :-1] + ray_bundle.bins[..., 1:]) | |
self.assertClose(ray_bundle.lengths, lengths) | |
shape_out = ( | |
(batch_size, image_width, image_height) | |
if evaluation_mode == EvaluationMode.EVALUATION | |
else (batch_size, n_rays_per_image, 1) | |
) | |
self.assertEqual(ray_bundle.pixel_radii_2d.shape, (*shape_out, 1)) | |
self.assertEqual(ray_bundle.directions.shape, (*shape_out, 3)) | |
self.assertEqual(ray_bundle.origins.shape, (*shape_out, 3)) | |
# Helper to test compute_radii | |
def compute_pixel_radii_from_grid(pixel_grid: torch.Tensor) -> torch.Tensor: | |
""" | |
Compute the radii of a conical frustum given the pixel grid. | |
To compute the radii we first compute the translation from a pixel | |
to its neighbors along the x and y axis. Then, we compute the norm | |
of each translation along the x and y axis. | |
The radii are then obtained by the following formula: | |
(dx_norm + dy_norm) * 0.5 * 2 / 12**0.5 | |
where 2/12**0.5 is a scaling factor to match | |
the variance of the pixel’s footprint. | |
Args: | |
pixel_grid: A tensor of shape `(..., H, W, dim)` representing the | |
full grid of rays pixel_grid. | |
Returns: | |
The radiis for each pixels and shape `(..., H, W, 1)`. | |
""" | |
# [B, H, W - 1, 3] | |
x_translation = torch.diff(pixel_grid, dim=-2) | |
# [B, H - 1, W, 3] | |
y_translation = torch.diff(pixel_grid, dim=-3) | |
# [B, H, W - 1, 1] | |
dx_norm = torch.linalg.norm(x_translation, dim=-1, keepdim=True) | |
# [B, H - 1, W, 1] | |
dy_norm = torch.linalg.norm(y_translation, dim=-1, keepdim=True) | |
# Fill the missing value [B, H, W, 1] | |
dx_norm = torch.concatenate([dx_norm, dx_norm[..., -1:, :]], -2) | |
dy_norm = torch.concatenate([dy_norm, dy_norm[..., -1:, :, :]], -3) | |
# Cut the distance in half to obtain the base radius: (dx_norm + dy_norm) * 0.5 | |
# and multiply it by the scaling factor: * 2 / 12**0.5 | |
radii = (dx_norm + dy_norm) / 12**0.5 | |
return radii | |
class TestRadiiComputationOnFullGrid(TestCaseMixin, unittest.TestCase): | |
def test_compute_pixel_radii_from_grid(self): | |
pixel_grid = torch.tensor( | |
[ | |
[[0.0, 0, 0], [1.0, 0.0, 0], [3.0, 0.0, 0.0]], | |
[[0.0, 0.25, 0], [1.0, 0.25, 0], [3.0, 0.25, 0]], | |
[[0.0, 1, 0], [1.0, 1.0, 0], [3.0000, 1.0, 0]], | |
] | |
) | |
expected_y_norm = torch.tensor( | |
[ | |
[0.25, 0.25, 0.25], | |
[0.75, 0.75, 0.75], | |
[0.75, 0.75, 0.75], # duplicated from previous row | |
] | |
) | |
expected_x_norm = torch.tensor( | |
[ | |
# 3rd column is duplicated from 2nd | |
[1.0, 2.0, 2.0], | |
[1.0, 2.0, 2.0], | |
[1.0, 2.0, 2.0], | |
] | |
) | |
expected_radii = (expected_x_norm + expected_y_norm) / 12**0.5 | |
radii = compute_pixel_radii_from_grid(pixel_grid) | |
self.assertClose(radii, expected_radii[..., None]) | |