ZayarnyukNick's picture
Upload folder using huggingface_hub
864ebc9 verified
#!/usr/bin/env python3
# Copyright (C) 2024-present Naver Corporation. All rights reserved.
# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
#
# --------------------------------------------------------
# Preprocessing code for the MegaDepth dataset
# dataset at https://www.cs.cornell.edu/projects/megadepth/
# --------------------------------------------------------
import collections
import os
import os.path as osp
import numpy as np
from tqdm import tqdm
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
import cv2
import h5py
import path_to_root # noqa
from dust3r.datasets.utils import cropping # noqa
from dust3r.utils.parallel import parallel_threads
def get_parser():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--megadepth_dir", required=True)
parser.add_argument("--precomputed_pairs", required=True)
parser.add_argument("--output_dir", default="data/megadepth_processed")
return parser
def main(db_root, pairs_path, output_dir):
os.makedirs(output_dir, exist_ok=True)
# load all pairs
data = np.load(pairs_path, allow_pickle=True)
scenes = data["scenes"]
images = data["images"]
pairs = data["pairs"]
# enumerate all unique images
todo = collections.defaultdict(set)
for scene, im1, im2, score in pairs:
todo[scene].add(im1)
todo[scene].add(im2)
# for each scene, load intrinsics and then parallel crops
for scene, im_idxs in tqdm(todo.items(), desc="Overall"):
scene, subscene = scenes[scene].split()
out_dir = osp.join(output_dir, scene, subscene)
os.makedirs(out_dir, exist_ok=True)
# load all camera params
_, pose_w2cam, intrinsics = _load_kpts_and_poses(
db_root, scene, subscene, intrinsics=True
)
in_dir = osp.join(db_root, scene, "dense" + subscene)
args = [
(in_dir, img, intrinsics[img], pose_w2cam[img], out_dir)
for img in [images[im_id] for im_id in im_idxs]
]
parallel_threads(
resize_one_image,
args,
star_args=True,
front_num=0,
leave=False,
desc=f"{scene}/{subscene}",
)
# save pairs
print("Done! prepared all pairs in", output_dir)
def resize_one_image(root, tag, K_pre_rectif, pose_w2cam, out_dir):
if osp.isfile(osp.join(out_dir, tag + ".npz")):
return
# load image
img = cv2.cvtColor(
cv2.imread(osp.join(root, "imgs", tag), cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB
)
H, W = img.shape[:2]
# load depth
with h5py.File(osp.join(root, "depths", osp.splitext(tag)[0] + ".h5"), "r") as hd5:
depthmap = np.asarray(hd5["depth"])
# rectify = undistort the intrinsics
imsize_pre, K_pre, distortion = K_pre_rectif
imsize_post = img.shape[1::-1]
K_post = cv2.getOptimalNewCameraMatrix(
K_pre,
distortion,
imsize_pre,
alpha=0,
newImgSize=imsize_post,
centerPrincipalPoint=True,
)[0]
# downscale
img_out, depthmap_out, intrinsics_out, R_in2out = _downscale_image(
K_post, img, depthmap, resolution_out=(800, 600)
)
# write everything
img_out.save(osp.join(out_dir, tag + ".jpg"), quality=90)
cv2.imwrite(osp.join(out_dir, tag + ".exr"), depthmap_out)
camout2world = np.linalg.inv(pose_w2cam)
camout2world[:3, :3] = camout2world[:3, :3] @ R_in2out.T
np.savez(
osp.join(out_dir, tag + ".npz"),
intrinsics=intrinsics_out,
cam2world=camout2world,
)
def _downscale_image(camera_intrinsics, image, depthmap, resolution_out=(512, 384)):
H, W = image.shape[:2]
resolution_out = sorted(resolution_out)[:: +1 if W < H else -1]
image, depthmap, intrinsics_out = cropping.rescale_image_depthmap(
image, depthmap, camera_intrinsics, resolution_out, force=False
)
R_in2out = np.eye(3)
return image, depthmap, intrinsics_out, R_in2out
def _load_kpts_and_poses(root, scene_id, subscene, z_only=False, intrinsics=False):
if intrinsics:
with open(
os.path.join(
root, scene_id, "sparse", "manhattan", subscene, "cameras.txt"
),
"r",
) as f:
raw = f.readlines()[3:] # skip the header
camera_intrinsics = {}
for camera in raw:
camera = camera.split(" ")
width, height, focal, cx, cy, k0 = [float(elem) for elem in camera[2:]]
K = np.eye(3)
K[0, 0] = focal
K[1, 1] = focal
K[0, 2] = cx
K[1, 2] = cy
camera_intrinsics[int(camera[0])] = (
(int(width), int(height)),
K,
(k0, 0, 0, 0),
)
with open(
os.path.join(root, scene_id, "sparse", "manhattan", subscene, "images.txt"), "r"
) as f:
raw = f.read().splitlines()[4:] # skip the header
extract_pose = (
colmap_raw_pose_to_principal_axis if z_only else colmap_raw_pose_to_RT
)
poses = {}
points3D_idxs = {}
camera = []
for image, points in zip(raw[::2], raw[1::2]):
image = image.split(" ")
points = points.split(" ")
image_id = image[-1]
camera.append(int(image[-2]))
# find the principal axis
raw_pose = [float(elem) for elem in image[1:-2]]
poses[image_id] = extract_pose(raw_pose)
current_points3D_idxs = {int(i) for i in points[2::3] if i != "-1"}
assert -1 not in current_points3D_idxs, bb()
points3D_idxs[image_id] = current_points3D_idxs
if intrinsics:
image_intrinsics = {
im_id: camera_intrinsics[cam] for im_id, cam in zip(poses, camera)
}
return points3D_idxs, poses, image_intrinsics
else:
return points3D_idxs, poses
def colmap_raw_pose_to_principal_axis(image_pose):
qvec = image_pose[:4]
qvec = qvec / np.linalg.norm(qvec)
w, x, y, z = qvec
z_axis = np.float32(
[2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y]
)
return z_axis
def colmap_raw_pose_to_RT(image_pose):
qvec = image_pose[:4]
qvec = qvec / np.linalg.norm(qvec)
w, x, y, z = qvec
R = np.array(
[
[1 - 2 * y * y - 2 * z * z, 2 * x * y - 2 * z * w, 2 * x * z + 2 * y * w],
[2 * x * y + 2 * z * w, 1 - 2 * x * x - 2 * z * z, 2 * y * z - 2 * x * w],
[2 * x * z - 2 * y * w, 2 * y * z + 2 * x * w, 1 - 2 * x * x - 2 * y * y],
]
)
# principal_axis.append(R[2, :])
t = image_pose[4:7]
# World-to-Camera pose
current_pose = np.eye(4)
current_pose[:3, :3] = R
current_pose[:3, 3] = t
return current_pose
if __name__ == "__main__":
parser = get_parser()
args = parser.parse_args()
main(args.megadepth_dir, args.precomputed_pairs, args.output_dir)