File size: 7,740 Bytes
4f6b78d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 |
import numpy as np
import cv2
import os
from tqdm import tqdm
import argparse
import torch
TAG_FLOAT = 202021.25
def flow_read(filename):
""" Read optical flow from file, return (U,V) tuple.
Original code by Deqing Sun, adapted from Daniel Scharstein.
"""
f = open(filename,'rb')
check = np.fromfile(f,dtype=np.float32,count=1)[0]
assert check == TAG_FLOAT, 'flow_read:: Wrong tag in flow file (should be: {0}, is: {1}). Big-endian machine?'.format(TAG_FLOAT,check)
width = np.fromfile(f,dtype=np.int32,count=1)[0]
height = np.fromfile(f,dtype=np.int32,count=1)[0]
size = width*height
assert width > 0 and height > 0 and size > 1 and size < 100000000, 'flow_read:: Invalid input size (width = {0}, height = {1}).'.format(width,height)
tmp = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width*2))
u = tmp[:,np.arange(width)*2]
v = tmp[:,np.arange(width)*2 + 1]
return u,v
def cam_read(filename):
""" Read camera data, return (M,N) tuple.
M is the intrinsic matrix, N is the extrinsic matrix, so that
x = M*N*X,
where x is a point in homogeneous image pixel coordinates, and X is a
point in homogeneous world coordinates.
"""
f = open(filename,'rb')
check = np.fromfile(f,dtype=np.float32,count=1)[0]
assert check == TAG_FLOAT, 'cam_read:: Wrong tag in cam file (should be: {0}, is: {1}). Big-endian machine?'.format(TAG_FLOAT,check)
M = np.fromfile(f,dtype='float64',count=9).reshape((3,3))
N = np.fromfile(f,dtype='float64',count=12).reshape((3,4))
return M,N
def depth_read(filename):
""" Read depth data from file, return as numpy array. """
f = open(filename,'rb')
check = np.fromfile(f,dtype=np.float32,count=1)[0]
assert check == TAG_FLOAT, 'depth_read:: Wrong tag in depth file (should be: {0}, is: {1}). Big-endian machine?'.format(TAG_FLOAT,check)
width = np.fromfile(f,dtype=np.int32,count=1)[0]
height = np.fromfile(f,dtype=np.int32,count=1)[0]
size = width*height
assert width > 0 and height > 0 and size > 1 and size < 100000000, 'depth_read:: Invalid input size (width = {0}, height = {1}).'.format(width,height)
depth = np.fromfile(f,dtype=np.float32,count=-1).reshape((height,width))
return depth
def RT_to_extrinsic_matrix(R, T):
extrinsic_matrix = np.concatenate([R, T], axis=-1)
extrinsic_matrix = np.concatenate([extrinsic_matrix, np.array([[0, 0, 0, 1]])], axis=0)
return np.linalg.inv(extrinsic_matrix)
def depth_to_3d(depth_map, intrinsic_matrix):
height, width = depth_map.shape
i, j = np.meshgrid(np.arange(width), np.arange(height))
# Convert pixel coordinates and depth values to 3D points
x = (i - intrinsic_matrix[0, 2]) * depth_map / intrinsic_matrix[0, 0]
y = (j - intrinsic_matrix[1, 2]) * depth_map / intrinsic_matrix[1, 1]
z = depth_map
points_3d = np.stack([x, y, z], axis=-1)
return points_3d
def project_3d_to_2d(points_3d, intrinsic_matrix):
# Convert 3D points to homogeneous coordinates
projected_2d_hom = intrinsic_matrix @ points_3d.T
# Convert from homogeneous coordinates to 2D image coordinates
projected_2d = projected_2d_hom[:2, :] / projected_2d_hom[2, :]
return projected_2d.T
def compute_optical_flow(depth1, depth2, pose1, pose2, intrinsic_matrix1, intrinsic_matrix2):
# Input: All inputs as numpy arrays; convert torch tensors to numpy arrays if needed
if isinstance(depth1, torch.Tensor):
depth1 = depth1.cpu().numpy()
if isinstance(depth2, torch.Tensor):
depth2 = depth2.cpu().numpy()
if isinstance(pose1, torch.Tensor):
pose1 = pose1.cpu().numpy()
if isinstance(pose2, torch.Tensor):
pose2 = pose2.cpu().numpy()
if isinstance(intrinsic_matrix1, torch.Tensor):
intrinsic_matrix1 = intrinsic_matrix1.cpu().numpy()
if isinstance(intrinsic_matrix2, torch.Tensor):
intrinsic_matrix2 = intrinsic_matrix2.cpu().numpy()
points_3d_frame1 = depth_to_3d(depth1, intrinsic_matrix1).reshape(-1, 3)
points_3d_frame1_hom = np.concatenate([points_3d_frame1, np.ones((points_3d_frame1.shape[0], 1))], axis=1).T
# Calculate the transformation matrix from frame 1 to frame 2
transformation_matrix = (pose2) @ np.linalg.inv(pose1)
points_3d_frame2_hom = transformation_matrix @ points_3d_frame1_hom
points_3d_frame2 = (points_3d_frame2_hom[:3, :]).T
points_2d_frame1 = project_3d_to_2d(points_3d_frame1, intrinsic_matrix1)
points_2d_frame2 = project_3d_to_2d(points_3d_frame2, intrinsic_matrix2)
# Compute optical flow vectors
optical_flow = points_2d_frame2 - points_2d_frame1
return optical_flow
def get_dynamic_label(base_dir, seq, continuous=False, threshold=13.75, save_dir='dynamic_label'):
depth_dir = os.path.join(base_dir, 'depth', seq)
cam_dir = os.path.join(base_dir, 'camdata_left', seq)
flow_dir = os.path.join(base_dir, 'flow', seq)
dynamic_label_dir = os.path.join(base_dir, save_dir, seq)
os.makedirs(dynamic_label_dir, exist_ok=True)
frames = sorted([f for f in os.listdir(depth_dir) if f.endswith('.dpt')])
for i, frame1 in enumerate(frames):
if i == len(frames) - 1:
continue
frame2 = frames[i + 1]
frame1_id = frame1.split('.')[0]
frame2_id = frame2.split('.')[0]
# Load depth maps
depth_map_frame1 = depth_read(os.path.join(depth_dir, frame1))
depth_map_frame2 = depth_read(os.path.join(depth_dir, frame2))
# Load camera intrinsics and poses
intrinsic_matrix1, pose_frame1 = cam_read(os.path.join(cam_dir, f'{frame1_id}.cam'))
intrinsic_matrix2, pose_frame2 = cam_read(os.path.join(cam_dir, f'{frame2_id}.cam'))
# Pad pose with [0,0,0,1]
pose_frame1 = np.concatenate([pose_frame1, np.array([[0, 0, 0, 1]])], axis=0)
pose_frame2 = np.concatenate([pose_frame2, np.array([[0, 0, 0, 1]])], axis=0)
# Compute optical flow
optical_flow = compute_optical_flow(depth_map_frame1, depth_map_frame2, pose_frame1, pose_frame2, intrinsic_matrix1, intrinsic_matrix2)
# Reshape the optical flow to the image dimensions
height, width = depth_map_frame1.shape
optical_flow_image = optical_flow.reshape(height, width, 2)
# Load ground truth optical flow
u, v = flow_read(os.path.join(flow_dir, f'{frame1_id}.flo'))
gt_flow = np.stack([u, v], axis=-1)
# Compute the error map
error_map = np.linalg.norm(gt_flow - optical_flow_image, axis=-1)
if not continuous:
binary_error_map = error_map > threshold
# Save the binary error map
cv2.imwrite(os.path.join(dynamic_label_dir, f'{frame1_id}.png'), binary_error_map.astype(np.uint8) * 255)
else:
# Normalize the error map
error_map = error_map / error_map.max()
cv2.imwrite(os.path.join(dynamic_label_dir, f'{frame1_id}.png'), (error_map * 255).astype(np.uint8))
if __name__ == '__main__':
# Process all sequences
sequences = sorted(os.listdir('data/sintel/training/depth'))
base_dir = 'data/sintel/training'
parser = argparse.ArgumentParser()
parser.add_argument('--continuous', action='store_true')
parser.add_argument('--threshold', type=float, default=13.75)
parser.add_argument('--save_dir', type=str, default='dynamic_label')
args = parser.parse_args()
for seq in tqdm(sequences):
get_dynamic_label(base_dir, seq, continuous=args.continuous, threshold=args.threshold, save_dir=args.save_dir)
print(f'Finished processing {seq}')
|