|
|
|
|
|
|
|
|
|
|
|
|
|
import contextlib |
|
import gzip |
|
import os |
|
import unittest |
|
from typing import List |
|
|
|
import numpy as np |
|
import torch |
|
|
|
from pytorch3d.implicitron.dataset import types |
|
from pytorch3d.implicitron.dataset.dataset_base import FrameData |
|
from pytorch3d.implicitron.dataset.frame_data import FrameDataBuilder |
|
from pytorch3d.implicitron.dataset.utils import ( |
|
get_bbox_from_mask, |
|
load_16big_png_depth, |
|
load_1bit_png_mask, |
|
load_depth, |
|
load_depth_mask, |
|
load_image, |
|
load_mask, |
|
safe_as_tensor, |
|
transpose_normalize_image, |
|
) |
|
from pytorch3d.implicitron.tools.config import get_default_args |
|
from pytorch3d.renderer.cameras import PerspectiveCameras |
|
|
|
from tests.common_testing import TestCaseMixin |
|
from tests.implicitron.common_resources import get_skateboard_data |
|
|
|
|
|
class TestFrameDataBuilder(TestCaseMixin, unittest.TestCase): |
|
def setUp(self): |
|
torch.manual_seed(42) |
|
|
|
category = "skateboard" |
|
stack = contextlib.ExitStack() |
|
self.dataset_root, self.path_manager = stack.enter_context( |
|
get_skateboard_data() |
|
) |
|
self.addCleanup(stack.close) |
|
self.image_height = 768 |
|
self.image_width = 512 |
|
|
|
self.frame_data_builder = FrameDataBuilder( |
|
image_height=self.image_height, |
|
image_width=self.image_width, |
|
dataset_root=self.dataset_root, |
|
path_manager=self.path_manager, |
|
) |
|
|
|
|
|
frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") |
|
local_file = self.path_manager.get_local_path(frame_file) |
|
with gzip.open(local_file, "rt", encoding="utf8") as zipfile: |
|
frame_annots_list = types.load_dataclass( |
|
zipfile, List[types.FrameAnnotation] |
|
) |
|
self.frame_annotation = frame_annots_list[0] |
|
|
|
sequence_annotations_file = os.path.join( |
|
self.dataset_root, category, "sequence_annotations.jgz" |
|
) |
|
local_file = self.path_manager.get_local_path(sequence_annotations_file) |
|
with gzip.open(local_file, "rt", encoding="utf8") as zipfile: |
|
seq_annots_list = types.load_dataclass( |
|
zipfile, List[types.SequenceAnnotation] |
|
) |
|
seq_annots = {entry.sequence_name: entry for entry in seq_annots_list} |
|
self.seq_annotation = seq_annots[self.frame_annotation.sequence_name] |
|
|
|
point_cloud = self.seq_annotation.point_cloud |
|
self.frame_data = FrameData( |
|
frame_number=safe_as_tensor(self.frame_annotation.frame_number, torch.long), |
|
frame_timestamp=safe_as_tensor( |
|
self.frame_annotation.frame_timestamp, torch.float |
|
), |
|
sequence_name=self.frame_annotation.sequence_name, |
|
sequence_category=self.seq_annotation.category, |
|
camera_quality_score=safe_as_tensor( |
|
self.seq_annotation.viewpoint_quality_score, torch.float |
|
), |
|
point_cloud_quality_score=safe_as_tensor( |
|
point_cloud.quality_score, torch.float |
|
) |
|
if point_cloud is not None |
|
else None, |
|
) |
|
|
|
def test_frame_data_builder_args(self): |
|
|
|
get_default_args(FrameDataBuilder) |
|
|
|
def test_fix_point_cloud_path(self): |
|
"""Some files in Co3Dv2 have an accidental absolute path stored.""" |
|
original_path = "some_file_path" |
|
modified_path = self.frame_data_builder._fix_point_cloud_path(original_path) |
|
self.assertIn(original_path, modified_path) |
|
self.assertIn(self.frame_data_builder.dataset_root, modified_path) |
|
|
|
def test_load_and_adjust_frame_data(self): |
|
self.frame_data.image_size_hw = safe_as_tensor( |
|
self.frame_annotation.image.size, torch.long |
|
) |
|
self.frame_data.effective_image_size_hw = self.frame_data.image_size_hw |
|
|
|
fg_mask_np, mask_path = self.frame_data_builder._load_fg_probability( |
|
self.frame_annotation |
|
) |
|
self.frame_data.mask_path = mask_path |
|
self.frame_data.fg_probability = safe_as_tensor(fg_mask_np, torch.float) |
|
mask_thr = self.frame_data_builder.box_crop_mask_thr |
|
bbox_xywh = get_bbox_from_mask(fg_mask_np, mask_thr) |
|
self.frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.long) |
|
|
|
self.assertIsNotNone(self.frame_data.mask_path) |
|
self.assertTrue(torch.is_tensor(self.frame_data.fg_probability)) |
|
self.assertTrue(torch.is_tensor(self.frame_data.bbox_xywh)) |
|
|
|
self.assertEqual(self.frame_data.bbox_xywh.shape, torch.Size([4])) |
|
|
|
image_path = os.path.join( |
|
self.frame_data_builder.dataset_root, self.frame_annotation.image.path |
|
) |
|
image_np = load_image(self.frame_data_builder._local_path(image_path)) |
|
self.assertIsInstance(image_np, np.ndarray) |
|
self.frame_data.image_rgb = self.frame_data_builder._postprocess_image( |
|
image_np, self.frame_annotation.image.size, self.frame_data.fg_probability |
|
) |
|
self.assertIsInstance(self.frame_data.image_rgb, torch.Tensor) |
|
|
|
( |
|
self.frame_data.depth_map, |
|
depth_path, |
|
self.frame_data.depth_mask, |
|
) = self.frame_data_builder._load_mask_depth( |
|
self.frame_annotation, |
|
self.frame_data.fg_probability, |
|
) |
|
self.assertTrue(torch.is_tensor(self.frame_data.depth_map)) |
|
self.assertIsNotNone(depth_path) |
|
self.assertTrue(torch.is_tensor(self.frame_data.depth_mask)) |
|
|
|
new_size = (self.image_height, self.image_width) |
|
|
|
if self.frame_data_builder.box_crop: |
|
self.frame_data.crop_by_metadata_bbox_( |
|
self.frame_data_builder.box_crop_context, |
|
) |
|
|
|
|
|
self.frame_data.resize_frame_( |
|
new_size_hw=torch.tensor(new_size, dtype=torch.long), |
|
) |
|
self.assertEqual( |
|
self.frame_data.mask_crop.shape, |
|
torch.Size([1, self.image_height, self.image_width]), |
|
) |
|
self.assertEqual( |
|
self.frame_data.image_rgb.shape, |
|
torch.Size([3, self.image_height, self.image_width]), |
|
) |
|
self.assertEqual( |
|
self.frame_data.mask_crop.shape, |
|
torch.Size([1, self.image_height, self.image_width]), |
|
) |
|
self.assertEqual( |
|
self.frame_data.fg_probability.shape, |
|
torch.Size([1, self.image_height, self.image_width]), |
|
) |
|
self.assertEqual( |
|
self.frame_data.depth_map.shape, |
|
torch.Size([1, self.image_height, self.image_width]), |
|
) |
|
self.assertEqual( |
|
self.frame_data.depth_mask.shape, |
|
torch.Size([1, self.image_height, self.image_width]), |
|
) |
|
self.frame_data.camera = self.frame_data_builder._get_pytorch3d_camera( |
|
self.frame_annotation, |
|
) |
|
self.assertEqual(type(self.frame_data.camera), PerspectiveCameras) |
|
|
|
def test_transpose_normalize_image(self): |
|
def inverse_transpose_normalize_image(image: np.ndarray) -> np.ndarray: |
|
im = image * 255.0 |
|
return im.transpose((1, 2, 0)).astype(np.uint8) |
|
|
|
|
|
input_image = np.array( |
|
[[10, 20, 30], [40, 50, 60], [70, 80, 90]], dtype=np.uint8 |
|
) |
|
expected_input = inverse_transpose_normalize_image( |
|
transpose_normalize_image(input_image) |
|
) |
|
self.assertClose(input_image[..., None], expected_input) |
|
|
|
|
|
input_image = np.array( |
|
[ |
|
[[10, 20, 30], [40, 50, 60], [70, 80, 90]], |
|
[[100, 110, 120], [130, 140, 150], [160, 170, 180]], |
|
[[190, 200, 210], [220, 230, 240], [250, 255, 255]], |
|
], |
|
dtype=np.uint8, |
|
) |
|
expected_input = inverse_transpose_normalize_image( |
|
transpose_normalize_image(input_image) |
|
) |
|
self.assertClose(input_image, expected_input) |
|
|
|
def test_load_image(self): |
|
path = os.path.join(self.dataset_root, self.frame_annotation.image.path) |
|
local_path = self.path_manager.get_local_path(path) |
|
image = load_image(local_path) |
|
self.assertEqual(image.dtype, np.float32) |
|
self.assertLessEqual(np.max(image), 1.0) |
|
self.assertGreaterEqual(np.min(image), 0.0) |
|
|
|
def test_load_mask(self): |
|
path = os.path.join(self.dataset_root, self.frame_annotation.mask.path) |
|
path = self.path_manager.get_local_path(path) |
|
mask = load_mask(path) |
|
self.assertEqual(mask.dtype, np.float32) |
|
self.assertLessEqual(np.max(mask), 1.0) |
|
self.assertGreaterEqual(np.min(mask), 0.0) |
|
|
|
def test_load_depth(self): |
|
path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) |
|
path = self.path_manager.get_local_path(path) |
|
depth_map = load_depth(path, self.frame_annotation.depth.scale_adjustment) |
|
self.assertEqual(depth_map.dtype, np.float32) |
|
self.assertEqual(len(depth_map.shape), 3) |
|
|
|
def test_load_16big_png_depth(self): |
|
path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) |
|
path = self.path_manager.get_local_path(path) |
|
depth_map = load_16big_png_depth(path) |
|
self.assertEqual(depth_map.dtype, np.float32) |
|
self.assertEqual(len(depth_map.shape), 2) |
|
|
|
def test_load_1bit_png_mask(self): |
|
mask_path = os.path.join( |
|
self.dataset_root, self.frame_annotation.depth.mask_path |
|
) |
|
mask_path = self.path_manager.get_local_path(mask_path) |
|
mask = load_1bit_png_mask(mask_path) |
|
self.assertEqual(mask.dtype, np.float32) |
|
self.assertEqual(len(mask.shape), 2) |
|
|
|
def test_load_depth_mask(self): |
|
mask_path = os.path.join( |
|
self.dataset_root, self.frame_annotation.depth.mask_path |
|
) |
|
mask_path = self.path_manager.get_local_path(mask_path) |
|
mask = load_depth_mask(mask_path) |
|
self.assertEqual(mask.dtype, np.float32) |
|
self.assertEqual(len(mask.shape), 3) |
|
|