import json import os import h5py import numpy as np import torch from unik3d.datasets.image_dataset import ImageDataset from unik3d.datasets.sequence_dataset import SequenceDataset from unik3d.datasets.utils import DatasetFromList class ETH3D(ImageDataset): min_depth = 0.01 max_depth = 50.0 depth_scale = 1000.0 test_split = "train.txt" train_split = "train.txt" intrisics_file = "intrinsics.json" hdf5_paths = ["ETH3D.hdf5"] def __init__( self, image_shape, split_file, test_mode, benchmark=False, augmentations_db={}, normalize=True, resize_method="hard", mini=1.0, **kwargs, ): super().__init__( image_shape=image_shape, split_file=split_file, test_mode=test_mode, benchmark=benchmark, normalize=normalize, augmentations_db=augmentations_db, resize_method=resize_method, mini=mini, **kwargs, ) self.test_mode = test_mode self.load_dataset() def load_dataset(self): h5file = h5py.File( os.path.join(self.data_root, self.hdf5_paths[0]), "r", libver="latest", swmr=True, ) txt_file = np.array(h5file[self.split_file]) txt_string = txt_file.tostring().decode("ascii")[:-1] # correct the -1 intrinsics = np.array(h5file[self.intrisics_file]).tostring().decode("ascii") intrinsics = json.loads(intrinsics) h5file.close() dataset = [] for line in txt_string.split("\n"): image_filename, depth_filename = line.strip().split(" ") intrinsics_val = torch.tensor(intrinsics[image_filename]).squeeze()[:, :3] sample = [image_filename, depth_filename, intrinsics_val] dataset.append(sample) self.dataset = DatasetFromList(dataset) self.log_load_dataset() def pre_pipeline(self, results): results = super().pre_pipeline(results) results["dense"] = [True] * self.num_frames * self.num_copies results["quality"] = [1] * self.num_frames * self.num_copies return results class ETH3D_F(SequenceDataset): min_depth = 0.05 max_depth = 60.0 depth_scale = 1000.0 test_split = "train.txt" train_split = "train.txt" sequences_file = "sequences.json" hdf5_paths = ["ETH3D-F.hdf5"] def __init__( self, image_shape: tuple[int, int], split_file: str, test_mode: bool, normalize: bool, augmentations_db: dict[str, float], resize_method: str, mini: float = 1.0, num_frames: int = 1, benchmark: bool = False, decode_fields: list[str] = ["image", "depth"], inplace_fields: list[str] = ["camera_params", "cam2w"], **kwargs, ) -> None: super().__init__( image_shape=image_shape, split_file=split_file, test_mode=test_mode, benchmark=benchmark, normalize=normalize, augmentations_db=augmentations_db, resize_method=resize_method, mini=mini, num_frames=num_frames, decode_fields=( decode_fields if not test_mode else [*decode_fields, "points"] ), inplace_fields=inplace_fields, **kwargs, ) def pre_pipeline(self, results): results = super().pre_pipeline(results) results["dense"] = [True] * self.num_frames * self.num_copies results["quality"] = [1] * self.num_frames * self.num_copies return results class ETH3DRMVD(SequenceDataset): min_depth = 0.01 max_depth = 50.0 depth_scale = 1000.0 default_fps = 6 test_split = "test.txt" train_split = "test.txt" sequences_file = "sequences.json" hdf5_paths = ["eth3d_rmvd.hdf5"] def __init__( self, image_shape, split_file, test_mode, crop=None, augmentations_db={}, normalize=True, resize_method="hard", mini: float = 1.0, num_frames: int = 1, benchmark: bool = False, decode_fields: list[str] = ["image", "depth"], inplace_fields: list[str] = ["K", "cam2w"], **kwargs, ): super().__init__( image_shape=image_shape, split_file=split_file, test_mode=test_mode, benchmark=benchmark, normalize=normalize, augmentations_db=augmentations_db, resize_method=resize_method, mini=mini, num_frames=num_frames, decode_fields=decode_fields, inplace_fields=inplace_fields, **kwargs, )