pablovela5620 commited on
Commit
a8c8616
1 Parent(s): 899c526

chore: Update dependencies and remove unused files

Browse files
Files changed (11) hide show
  1. .gitignore +4 -3
  2. README.md +1 -1
  3. mini_dpvo/api/inference.py +182 -40
  4. mini_dpvo/dpvo.py +1 -0
  5. mini_dpvo/stream.py +31 -32
  6. packages.txt +0 -1
  7. pixi.lock +0 -0
  8. pixi.toml +37 -11
  9. pixi_app.py +0 -14
  10. tools/app.py +202 -101
  11. tools/demo.py +9 -11
.gitignore CHANGED
@@ -164,6 +164,7 @@ cython_debug/
164
  # pixi environments
165
  .pixi
166
  *.egg-info
167
- thirdparty/*
168
- data/*
169
- checkpoints/*
 
 
164
  # pixi environments
165
  .pixi
166
  *.egg-info
167
+ checkpoints/
168
+ media/
169
+ data/
170
+ build/
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🔥
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
- sdk_version: 4.31.5
8
  app_file: app.py
9
  pinned: false
10
  ---
 
4
  colorFrom: purple
5
  colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
  ---
mini_dpvo/api/inference.py CHANGED
@@ -16,6 +16,12 @@ from dataclasses import dataclass
16
 
17
  from timeit import default_timer as timer
18
 
 
 
 
 
 
 
19
 
20
  @dataclass
21
  class DPVOPrediction:
@@ -27,14 +33,20 @@ class DPVOPrediction:
27
 
28
  def log_trajectory(
29
  parent_log_path: Path,
30
- poses: Float32[torch.Tensor, "buffer_size 7"],
31
- points: Float32[torch.Tensor, "buffer_size*num_patches 3"],
32
- colors: UInt8[torch.Tensor, "buffer_size num_patches 3"],
33
  intri_np: Float64[np.ndarray, "4"],
34
- bgr_hw3: UInt8[np.ndarray, "h w 3"],
 
 
35
  ):
36
  cam_log_path = f"{parent_log_path}/camera"
37
- rr.log(f"{cam_log_path}/pinhole/image", rr.Image(bgr_hw3[..., ::-1]))
 
 
 
 
38
  rr.log(
39
  f"{cam_log_path}/pinhole",
40
  rr.Pinhole(
@@ -54,18 +66,45 @@ def log_trajectory(
54
  last_index = nonzero_poses.shape[0] - 1
55
  # get last non-zero pose, and the index of the last non-zero pose
56
  quat_pose = nonzero_poses[last_index].numpy(force=True)
57
- trans_quat = quat_pose[:3]
58
  rotation_quat = Rotation.from_quat(quat_pose[3:])
59
 
60
- mat3x3 = rotation_quat.as_matrix()
 
 
 
 
 
 
 
 
 
61
  rr.log(
62
  f"{cam_log_path}",
63
- rr.Transform3D(translation=trans_quat, mat3x3=mat3x3, from_parent=True),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  )
65
 
66
  # outlier removal
67
  trajectory_center = np.median(nonzero_poses[:, :3].numpy(force=True), axis=0)
68
- radii = lambda a: np.linalg.norm(a - trajectory_center, axis=1)
 
 
 
69
  points_np = nonzero_points.view(-1, 3).numpy(force=True)
70
  colors_np = colors.view(-1, 3)[points_mask].numpy(force=True)
71
  inlier_mask = (
@@ -82,6 +121,7 @@ def log_trajectory(
82
  colors=colors_filtered,
83
  ),
84
  )
 
85
 
86
 
87
  def log_final(
@@ -102,7 +142,7 @@ def log_final(
102
 
103
 
104
  def create_reader(
105
- imagedir: str, calib: str, stride: int, skip: int, queue: Queue
106
  ) -> Process:
107
  if os.path.isdir(imagedir):
108
  reader = Process(
@@ -116,56 +156,158 @@ def create_reader(
116
  return reader
117
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  @torch.no_grad()
120
- def run(
121
  cfg: CfgNode,
122
  network_path: str,
123
  imagedir: str,
124
  calib: str,
125
  stride: int = 1,
126
  skip: int = 0,
127
- vis_during: bool = True,
128
  timeit: bool = False,
129
  ) -> tuple[DPVOPrediction, float]:
130
  slam = None
131
  queue = Queue(maxsize=8)
 
132
  reader: Process = create_reader(imagedir, calib, stride, skip, queue)
133
  reader.start()
134
 
135
- if vis_during:
136
- parent_log_path = Path("world")
137
- rr.log(f"{parent_log_path}", rr.ViewCoordinates.RDF, timeless=True)
138
 
139
  start = timer()
140
-
141
- while True:
142
- t: int
143
- bgr_hw3: UInt8[np.ndarray, "h w 3"]
144
- intri_np: Float64[np.ndarray, "4"]
145
- (t, bgr_hw3, intri_np) = queue.get()
146
- # queue will have a (-1, image, intrinsics) tuple when the reader is done
147
- if t < 0:
148
- break
149
-
150
- if vis_during:
151
- rr.set_time_sequence(timeline="timestep", sequence=t)
152
-
153
- bgr_3hw: UInt8[torch.Tensor, "h w 3"] = (
154
- torch.from_numpy(bgr_hw3).permute(2, 0, 1).cuda()
155
  )
156
- intri_torch: Float64[torch.Tensor, "4"] = torch.from_numpy(intri_np).cuda()
 
 
 
 
 
 
 
 
 
 
157
 
158
- if slam is None:
159
- slam = DPVO(cfg, network_path, ht=bgr_3hw.shape[1], wd=bgr_3hw.shape[2])
 
 
 
 
 
 
 
 
160
 
161
- with Timer("SLAM", enabled=timeit):
162
- slam(t, bgr_3hw, intri_torch)
163
 
164
- if slam.is_initialized and vis_during:
165
- poses: Float32[torch.Tensor, "buffer_size 7"] = slam.poses_
166
- points: Float32[torch.Tensor, "buffer_size*num_patches 3"] = slam.points_
167
- colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] = slam.colors_
168
- log_trajectory(parent_log_path, poses, points, colors, intri_np, bgr_hw3)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  for _ in range(12):
171
  slam.update()
 
16
 
17
  from timeit import default_timer as timer
18
 
19
+ import cv2
20
+ import mmcv
21
+ from tqdm import tqdm
22
+ from mini_dust3r.api import OptimizedResult, inferece_dust3r
23
+ from mini_dust3r.model import AsymmetricCroCo3DStereo
24
+
25
 
26
  @dataclass
27
  class DPVOPrediction:
 
33
 
34
  def log_trajectory(
35
  parent_log_path: Path,
36
+ poses: Float32[torch.Tensor, "buffer_size 7"], # noqa: F722
37
+ points: Float32[torch.Tensor, "buffer_size*num_patches 3"], # noqa: F722
38
+ colors: UInt8[torch.Tensor, "buffer_size num_patches 3"], # noqa: F722
39
  intri_np: Float64[np.ndarray, "4"],
40
+ bgr_hw3: UInt8[np.ndarray, "h w 3"], # noqa: F722
41
+ path_list: list,
42
+ jpg_quality: int = 90,
43
  ):
44
  cam_log_path = f"{parent_log_path}/camera"
45
+ rgb_hw3 = mmcv.bgr2rgb(bgr_hw3)
46
+ rr.log(
47
+ f"{cam_log_path}/pinhole/image",
48
+ rr.Image(rgb_hw3).compress(jpeg_quality=jpg_quality),
49
+ )
50
  rr.log(
51
  f"{cam_log_path}/pinhole",
52
  rr.Pinhole(
 
66
  last_index = nonzero_poses.shape[0] - 1
67
  # get last non-zero pose, and the index of the last non-zero pose
68
  quat_pose = nonzero_poses[last_index].numpy(force=True)
69
+ trans_quat: Float32[np.ndarray, "3"] = quat_pose[:3]
70
  rotation_quat = Rotation.from_quat(quat_pose[3:])
71
 
72
+ cam_R_world: Float64[np.ndarray, "3 3"] = rotation_quat.as_matrix()
73
+
74
+ cam_T_world = np.eye(4)
75
+ cam_T_world[:3, :3] = cam_R_world
76
+ cam_T_world[0:3, 3] = trans_quat
77
+
78
+ world_T_cam = np.linalg.inv(cam_T_world)
79
+
80
+ path_list.append(world_T_cam[:3, 3].copy().tolist())
81
+
82
  rr.log(
83
  f"{cam_log_path}",
84
+ rr.Transform3D(
85
+ translation=world_T_cam[:3, 3],
86
+ mat3x3=world_T_cam[:3, :3],
87
+ from_parent=False,
88
+ ),
89
+ )
90
+
91
+ # log path using linestrip
92
+ rr.log(
93
+ f"{parent_log_path}/path",
94
+ rr.LineStrips3D(
95
+ strips=[
96
+ path_list,
97
+ ],
98
+ colors=[255, 0, 0],
99
+ ),
100
  )
101
 
102
  # outlier removal
103
  trajectory_center = np.median(nonzero_poses[:, :3].numpy(force=True), axis=0)
104
+
105
+ def radii(a):
106
+ return np.linalg.norm(a - trajectory_center, axis=1)
107
+
108
  points_np = nonzero_points.view(-1, 3).numpy(force=True)
109
  colors_np = colors.view(-1, 3)[points_mask].numpy(force=True)
110
  inlier_mask = (
 
121
  colors=colors_filtered,
122
  ),
123
  )
124
+ return path_list
125
 
126
 
127
  def log_final(
 
142
 
143
 
144
  def create_reader(
145
+ imagedir: str, calib: str | None, stride: int, skip: int, queue: Queue
146
  ) -> Process:
147
  if os.path.isdir(imagedir):
148
  reader = Process(
 
156
  return reader
157
 
158
 
159
+ def calculate_num_frames(video_or_image_dir: str, stride: int, skip: int) -> int:
160
+ # Determine the total number of frames
161
+ total_frames = 0
162
+ if os.path.isdir(video_or_image_dir):
163
+ total_frames = len(
164
+ [
165
+ name
166
+ for name in os.listdir(video_or_image_dir)
167
+ if os.path.isfile(os.path.join(video_or_image_dir, name))
168
+ ]
169
+ )
170
+ else:
171
+ cap = cv2.VideoCapture(video_or_image_dir)
172
+ total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
173
+ cap.release()
174
+
175
+ total_frames = (total_frames - skip) // stride
176
+ return total_frames
177
+
178
+
179
+ def calib_from_dust3r(
180
+ bgr_hw3: UInt8[np.ndarray, "height width 3"],
181
+ model: AsymmetricCroCo3DStereo,
182
+ device: str,
183
+ ) -> Float64[np.ndarray, "3 3"]:
184
+ """
185
+ Calculates the calibration matrix from mini-dust3r.
186
+
187
+ Args:
188
+ bgr_hw3: The input image in BGR format with shape (height, width, 3).
189
+ model: The Dust3D-R model used for inference.
190
+ device: The device to run the inference on.
191
+
192
+ Returns:
193
+ The calibration matrix with shape (3, 3).
194
+
195
+ Raises:
196
+ None.
197
+ """
198
+ tmp_path = Path("/tmp/dpvo/tmp.png")
199
+ # save image
200
+ mmcv.imwrite(bgr_hw3, str(tmp_path))
201
+ optimized_results: OptimizedResult = inferece_dust3r(
202
+ image_dir_or_list=tmp_path.parent,
203
+ model=model,
204
+ device=device,
205
+ batch_size=1,
206
+ )
207
+ # DELETE tmp file
208
+ tmp_path.unlink()
209
+
210
+ # get predicted intrinsics in original image size
211
+ downscaled_h, downscaled_w, _ = optimized_results.rgb_hw3_list[0].shape
212
+ orig_h, orig_w, _ = bgr_hw3.shape
213
+
214
+ # Scaling factors
215
+ scaling_factor_x = orig_w / downscaled_w
216
+ scaling_factor_y = orig_h / downscaled_h
217
+
218
+ # Scale the intrinsic matrix to the original image size
219
+ K_33_original = optimized_results.K_b33[0].copy()
220
+ K_33_original[0, 0] *= scaling_factor_x # fx
221
+ K_33_original[1, 1] *= scaling_factor_y # fy
222
+ K_33_original[0, 2] *= scaling_factor_x # cx
223
+ K_33_original[1, 2] *= scaling_factor_y # cy
224
+
225
+ return K_33_original
226
+
227
+
228
  @torch.no_grad()
229
+ def inference_dpvo(
230
  cfg: CfgNode,
231
  network_path: str,
232
  imagedir: str,
233
  calib: str,
234
  stride: int = 1,
235
  skip: int = 0,
 
236
  timeit: bool = False,
237
  ) -> tuple[DPVOPrediction, float]:
238
  slam = None
239
  queue = Queue(maxsize=8)
240
+
241
  reader: Process = create_reader(imagedir, calib, stride, skip, queue)
242
  reader.start()
243
 
244
+ parent_log_path = Path("world")
245
+ rr.log(f"{parent_log_path}", rr.ViewCoordinates.RDF, timeless=True)
 
246
 
247
  start = timer()
248
+ total_frames = calculate_num_frames(imagedir, stride, skip)
249
+
250
+ # estimate camera intrinsics if not provided
251
+ if calib is None:
252
+ dust3r_device = (
253
+ "mps"
254
+ if torch.backends.mps.is_available()
255
+ else "cuda"
256
+ if torch.cuda.is_available()
257
+ else "cpu"
 
 
 
 
 
258
  )
259
+ dust3r_model = AsymmetricCroCo3DStereo.from_pretrained(
260
+ "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
261
+ ).to(dust3r_device)
262
+ _, bgr_hw3, _ = queue.get()
263
+ K_33_pred = calib_from_dust3r(bgr_hw3, dust3r_model, dust3r_device)
264
+ intri_np_dust3r = np.array(
265
+ [K_33_pred[0, 0], K_33_pred[1, 1], K_33_pred[0, 2], K_33_pred[1, 2]]
266
+ )
267
+
268
+ # path list for visualizing the trajectory
269
+ path_list = []
270
 
271
+ with tqdm(total=total_frames, desc="Processing Frames") as pbar:
272
+ while True:
273
+ t: int
274
+ bgr_hw3: UInt8[np.ndarray, "h w 3"]
275
+ intri_np: Float64[np.ndarray, "4"]
276
+ (t, bgr_hw3, intri_np_calib) = queue.get()
277
+ intri_np = intri_np_calib if calib is not None else intri_np_dust3r
278
+ # queue will have a (-1, image, intrinsics) tuple when the reader is done
279
+ if t < 0:
280
+ break
281
 
282
+ rr.set_time_sequence(timeline="timestep", sequence=t)
 
283
 
284
+ bgr_3hw: UInt8[torch.Tensor, "h w 3"] = (
285
+ torch.from_numpy(bgr_hw3).permute(2, 0, 1).cuda()
286
+ )
287
+ intri_torch: Float64[torch.Tensor, "4"] = torch.from_numpy(intri_np).cuda()
288
+
289
+ if slam is None:
290
+ slam = DPVO(cfg, network_path, ht=bgr_3hw.shape[1], wd=bgr_3hw.shape[2])
291
+
292
+ with Timer("SLAM", enabled=timeit):
293
+ slam(t, bgr_3hw, intri_torch)
294
+
295
+ if slam.is_initialized:
296
+ poses: Float32[torch.Tensor, "buffer_size 7"] = slam.poses_
297
+ points: Float32[torch.Tensor, "buffer_size*num_patches 3"] = (
298
+ slam.points_
299
+ )
300
+ colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] = slam.colors_
301
+ path_list = log_trajectory(
302
+ parent_log_path=parent_log_path,
303
+ poses=poses,
304
+ points=points,
305
+ colors=colors,
306
+ intri_np=intri_np,
307
+ bgr_hw3=bgr_hw3,
308
+ path_list=path_list,
309
+ )
310
+ pbar.update(1)
311
 
312
  for _ in range(12):
313
  slam.update()
mini_dpvo/dpvo.py CHANGED
@@ -156,6 +156,7 @@ class DPVO:
156
  poses = lietorch.stack(poses, dim=0)
157
  poses = poses.inv().data.cpu().numpy()
158
  tstamps = np.array(self.tlist, dtype=np.float64)
 
159
 
160
  return poses, tstamps
161
 
 
156
  poses = lietorch.stack(poses, dim=0)
157
  poses = poses.inv().data.cpu().numpy()
158
  tstamps = np.array(self.tlist, dtype=np.float64)
159
+ print("Done!")
160
 
161
  return poses, tstamps
162
 
mini_dpvo/stream.py CHANGED
@@ -3,13 +3,10 @@ import numpy as np
3
  from pathlib import Path
4
  from itertools import chain
5
  from multiprocessing import Queue
 
6
 
7
 
8
- def image_stream(
9
- queue: Queue, imagedir: str, calib: str, stride: int, skip: int = 0
10
- ) -> None:
11
- """image generator"""
12
-
13
  calib = np.loadtxt(calib, delimiter=" ")
14
  fx, fy, cx, cy = calib[:4]
15
 
@@ -18,6 +15,17 @@ def image_stream(
18
  K[0, 2] = cx
19
  K[1, 1] = fy
20
  K[1, 2] = cy
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  img_exts = ["*.png", "*.jpeg", "*.jpg"]
23
  image_list = sorted(chain.from_iterable(Path(imagedir).glob(e) for e in img_exts))[
@@ -26,15 +34,11 @@ def image_stream(
26
 
27
  for t, imfile in enumerate(image_list):
28
  image = cv2.imread(str(imfile))
29
- if len(calib) > 4:
30
- image = cv2.undistort(image, K, calib[4:])
31
-
32
- if 0:
33
- image = cv2.resize(image, None, fx=0.5, fy=0.5)
34
- intrinsics = np.array([fx / 2, fy / 2, cx / 2, cy / 2])
35
 
36
- else:
37
  intrinsics = np.array([fx, fy, cx, cy])
 
 
38
 
39
  h, w, _ = image.shape
40
  image = image[: h - h % 16, : w - w % 16]
@@ -45,48 +49,43 @@ def image_stream(
45
 
46
 
47
  def video_stream(
48
- queue: Queue, imagedir: str, calib: str, stride: int, skip: int = 0
49
  ) -> None:
50
  """video generator"""
 
 
 
51
 
52
- calib = np.loadtxt(calib, delimiter=" ")
53
- fx, fy, cx, cy = calib[:4]
54
-
55
- K = np.eye(3)
56
- K[0, 0] = fx
57
- K[0, 2] = cx
58
- K[1, 1] = fy
59
- K[1, 2] = cy
60
-
61
- cap = cv2.VideoCapture(imagedir)
62
 
63
  t = 0
64
 
65
  for _ in range(skip):
66
- ret, image = cap.read()
67
 
68
  while True:
69
  # Capture frame-by-frame
70
  for _ in range(stride):
71
- ret, image = cap.read()
72
- # if frame is read correctly ret is True
73
- if not ret:
74
  break
75
 
76
- if not ret:
77
  break
78
 
79
- if len(calib) > 4:
80
- image = cv2.undistort(image, K, calib[4:])
81
 
82
  image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
83
  h, w, _ = image.shape
84
  image = image[: h - h % 16, : w - w % 16]
85
 
86
- intrinsics = np.array([fx * 0.5, fy * 0.5, cx * 0.5, cy * 0.5])
 
 
 
87
  queue.put((t, image, intrinsics))
88
 
89
  t += 1
90
 
91
  queue.put((-1, image, intrinsics))
92
- cap.release()
 
3
  from pathlib import Path
4
  from itertools import chain
5
  from multiprocessing import Queue
6
+ import mmcv
7
 
8
 
9
+ def load_calib(calib: str) -> np.ndarray:
 
 
 
 
10
  calib = np.loadtxt(calib, delimiter=" ")
11
  fx, fy, cx, cy = calib[:4]
12
 
 
15
  K[0, 2] = cx
16
  K[1, 1] = fy
17
  K[1, 2] = cy
18
+ return K, calib
19
+
20
+
21
+ def image_stream(
22
+ queue: Queue, imagedir: str, calib: str | None, stride: int, skip: int = 0
23
+ ) -> None:
24
+ """image generator"""
25
+
26
+ if calib is not None:
27
+ K, calib = load_calib(calib)
28
+ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
29
 
30
  img_exts = ["*.png", "*.jpeg", "*.jpg"]
31
  image_list = sorted(chain.from_iterable(Path(imagedir).glob(e) for e in img_exts))[
 
34
 
35
  for t, imfile in enumerate(image_list):
36
  image = cv2.imread(str(imfile))
 
 
 
 
 
 
37
 
38
+ if calib is not None:
39
  intrinsics = np.array([fx, fy, cx, cy])
40
+ else:
41
+ intrinsics = None
42
 
43
  h, w, _ = image.shape
44
  image = image[: h - h % 16, : w - w % 16]
 
49
 
50
 
51
  def video_stream(
52
+ queue: Queue, imagedir: str, calib: str | None, stride: int, skip: int = 0
53
  ) -> None:
54
  """video generator"""
55
+ if calib is not None:
56
+ K, calib = load_calib(calib)
57
+ fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
58
 
59
+ video_reader = mmcv.VideoReader(imagedir)
 
 
 
 
 
 
 
 
 
60
 
61
  t = 0
62
 
63
  for _ in range(skip):
64
+ image = video_reader.read()
65
 
66
  while True:
67
  # Capture frame-by-frame
68
  for _ in range(stride):
69
+ image = video_reader.read()
70
+ if image is None:
 
71
  break
72
 
73
+ if image is None:
74
  break
75
 
76
+ # if len(calib) > 4:
77
+ # image = cv2.undistort(image, K, calib[4:])
78
 
79
  image = cv2.resize(image, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
80
  h, w, _ = image.shape
81
  image = image[: h - h % 16, : w - w % 16]
82
 
83
+ if calib is not None:
84
+ intrinsics = np.array([fx * 0.5, fy * 0.5, cx * 0.5, cy * 0.5])
85
+ else:
86
+ intrinsics = None
87
  queue.put((t, image, intrinsics))
88
 
89
  t += 1
90
 
91
  queue.put((-1, image, intrinsics))
 
packages.txt DELETED
@@ -1 +0,0 @@
1
- lsof
 
 
pixi.lock CHANGED
The diff for this file is too large to render. See raw diff
 
pixi.toml CHANGED
@@ -1,6 +1,6 @@
1
  [project]
2
  name = "mini-dpvo"
3
- version = "0.1.1"
4
  description = "Add a short description here"
5
  authors = ["pablovela5620 <[email protected]>"]
6
  channels = ["nvidia/label/cuda-11.8.0", "nvidia", "conda-forge", "pytorch", "pyg"]
@@ -15,16 +15,40 @@ download-model = """
15
  || (
16
  wget https://www.dropbox.com/s/nap0u8zslspdwm4/models.zip
17
  && unzip models.zip -d checkpoints
 
18
  )
19
  """
20
- post-install = {cmd="python -m pip install -e .", depends_on=["download-model"]}
21
- old-app = "python pixi_app.py"
22
- app = {cmd="python tools/app.py", depends_on=["post-install"], outputs=["mini_dpvo.egg-info/PKG-INFO"]}
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  [dependencies]
26
  python = "3.11.*"
27
- pip = ">=23.3.2,<23.4"
28
  cuda = {version = "*", channel="nvidia/label/cuda-11.8.0"}
29
  pytorch-cuda = {version = "11.8.*", channel="pytorch"}
30
  pytorch = {version = ">=2.2.0,<2.3", channel="pytorch"}
@@ -35,14 +59,16 @@ matplotlib = ">=3.8.4,<3.9"
35
  yacs = ">=0.1.8,<0.2"
36
  jaxtyping = ">=0.2.28,<0.3"
37
  icecream = ">=2.1.3,<2.2"
38
- rerun-sdk = "0.15.*"
39
- gradio = "4.31.2.*"
40
  eigen = ">=3.4.0,<3.5"
 
 
 
41
 
42
  [pypi-dependencies]
43
- mini-dust3r = "*"
44
- spaces = "==0.28.3"
45
  opencv-python = ">=4.9.0.80"
46
  evo = ">=1.28.0"
47
- gradio-rerun = "*"
48
- mmcv = "*"
 
 
 
 
1
  [project]
2
  name = "mini-dpvo"
3
+ version = "0.1.0"
4
  description = "Add a short description here"
5
  authors = ["pablovela5620 <[email protected]>"]
6
  channels = ["nvidia/label/cuda-11.8.0", "nvidia", "conda-forge", "pytorch", "pyg"]
 
15
  || (
16
  wget https://www.dropbox.com/s/nap0u8zslspdwm4/models.zip
17
  && unzip models.zip -d checkpoints
18
+ && rm -r models.zip
19
  )
20
  """
21
+ download-dpvo-data = """
22
+ test -e data/movies/IMG_0492.MOV
23
+ || (
24
+ wget https://www.dropbox.com/s/7030y0mdl6efteg/movies.zip -P data/
25
+ && unzip data/movies.zip -d data/
26
+ && rm -r data/movies.zip
27
+ )
28
+ """
29
+
30
+ download-iphone-data = """
31
+ test -e data/iphone/pool.MOV
32
+ || (
33
+ huggingface-cli download pablovela5620/dpvo-example-data pool.MOV --repo-type dataset --local-dir data/iphone/
34
+ )
35
+ """
36
 
37
+ post-install = {cmd="python -m pip install -e .", depends_on=["download-model", "download-dpvo-data", "download-iphone-data"], outputs=["cuda_ba.cpython-311-x86_64-linux-gnu.so"]}
38
+ rr-viewer = "rerun --memory-limit 50% --drop-at-latency 500ms"
39
+
40
+ demo = """
41
+ python tools/demo.py --imagedir data/movies/IMG_0493.MOV --config config/fast.yaml
42
+ """
43
+ app = {cmd="python tools/app.py", depends_on=["post-install"]}
44
+
45
+ # Docker tasks
46
+ docker-build = "docker build --no-cache -t mini-dpvo ."
47
+ docker-run = {cmd="docker run --gpus all -it -p 7860:7860 mini-dpvo", depends_on=["docker-build"]}
48
 
49
  [dependencies]
50
  python = "3.11.*"
51
+ pip = ">=24.0,<25"
52
  cuda = {version = "*", channel="nvidia/label/cuda-11.8.0"}
53
  pytorch-cuda = {version = "11.8.*", channel="pytorch"}
54
  pytorch = {version = ">=2.2.0,<2.3", channel="pytorch"}
 
59
  yacs = ">=0.1.8,<0.2"
60
  jaxtyping = ">=0.2.28,<0.3"
61
  icecream = ">=2.1.3,<2.2"
 
 
62
  eigen = ">=3.4.0,<3.5"
63
+ rerun-sdk = ">=0.16.1"
64
+ tyro = ">=0.8.4,<0.9"
65
+ unzip = ">=6.0,<7"
66
 
67
  [pypi-dependencies]
 
 
68
  opencv-python = ">=4.9.0.80"
69
  evo = ">=1.28.0"
70
+ mini-dust3r = "*"
71
+ gradio-rerun = ">=0.0.3"
72
+ mmcv = "*"
73
+ yt-dlp = "*"
74
+ gradio = ">=4.36.0"
pixi_app.py DELETED
@@ -1,14 +0,0 @@
1
- import gradio as gr
2
- import spaces
3
- import torch
4
-
5
- tensor = torch.zeros(3).cuda()
6
- print(tensor.device)
7
-
8
- @spaces.GPU
9
- def greet(name):
10
- print(tensor.device)
11
- return "Hello pixi pablo " + name + "!!"
12
-
13
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
14
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools/app.py CHANGED
@@ -1,89 +1,140 @@
1
  import gradio as gr
2
 
3
- # import spaces
4
  from gradio_rerun import Rerun
5
  import rerun as rr
6
  import rerun.blueprint as rrb
7
- from pathlib import Path
8
- import uuid
9
  import mmcv
10
- import spaces
 
11
 
12
- from mini_dpvo.api.inference import run
13
  from mini_dpvo.config import cfg as base_cfg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- base_cfg.merge_from_file("config/fast.yaml")
16
- base_cfg.BUFFER_SIZE = 2048
17
-
18
 
19
- def create_blueprint(image_name_list: list[str], log_path: Path) -> rrb.Blueprint:
20
- # dont show 2d views if there are more than 4 images as to not clutter the view
21
- if len(image_name_list) > 4:
22
- blueprint = rrb.Blueprint(
23
- rrb.Horizontal(
24
- rrb.Spatial3DView(origin=f"{log_path}"),
25
- ),
26
- collapse_panels=True,
27
- )
28
  else:
29
- blueprint = rrb.Blueprint(
30
- rrb.Horizontal(
31
- contents=[
32
- rrb.Spatial3DView(origin=f"{log_path}"),
33
- rrb.Vertical(
34
- contents=[
35
- rrb.Spatial2DView(
36
- origin=f"{log_path}/camera_{i}/pinhole/",
37
- contents=[
38
- "+ $origin/**",
39
- ],
40
- )
41
- for i in range(len(image_name_list))
42
- ]
43
- ),
44
- ],
45
- column_shares=[3, 1],
46
- ),
47
- collapse_panels=True,
48
- )
49
- return blueprint
50
 
 
 
51
 
52
- @spaces.GPU
53
- def predict(video_file_path: str, stride: int) -> tuple[str, str]:
54
- # check if is list or string and if not raise error
55
- if not isinstance(video_file_path, str):
56
- raise gr.Error(
57
- f"Something is wrong with your input video, got: {type(video_file_path)}"
58
- )
 
 
59
 
60
- uuid_str = str(uuid.uuid4())
61
- filename = Path(f"/tmp/gradio/{uuid_str}.rrd")
62
- if not filename.parent.exists():
63
- filename.parent.mkdir(parents=True)
64
- rr.init(f"{uuid_str}")
65
-
66
- calib_path = "data/calib/iphone.txt"
67
- if not Path(calib_path).exists():
68
- gr.Error(f"Calibration file not found at {calib_path}")
69
-
70
- dpvo_pred, time_taken = run(
71
- cfg=base_cfg,
72
- network_path="checkpoints/dpvo.pth",
73
- imagedir=video_file_path,
74
- calib="data/calib/iphone.txt",
75
- stride=stride,
76
- skip=0,
77
- vis_during=True,
78
- )
79
 
80
- # blueprint: rrb.Blueprint = create_blueprint(image_name_list, log_path)
81
- # rr.send_blueprint(blueprint)
82
 
83
- rr.set_time_sequence("sequence", 0)
84
- # log_optimized_result(optimized_results, log_path)
85
- rr.save(filename.as_posix())
86
- return filename.as_posix(), f"Total time: {time_taken:.2f}s"
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
 
89
  def on_file_upload(video_file_path: str) -> None:
@@ -96,26 +147,23 @@ def on_file_upload(video_file_path: str) -> None:
96
  return video_info
97
 
98
 
99
- with gr.Blocks(
100
- css=""".gradio-container {margin: 0 !important; min-width: 100%};""",
101
- title="Mini-DPVO Demo",
102
- ) as demo:
103
- # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
104
- gr.HTML('<h2 style="text-align: center;">Mini-DPVO Demo</h2>')
105
- gr.HTML(
106
- '<p style="text-align: center;">Unofficial DPVO demo using the mini-dpvo pip package</p>'
107
- )
108
- gr.HTML(
109
- '<p style="text-align: center;">Learn more about mini-dpvo here <a href="https://github.com/pablovela5620/mini-dpvo">here</a></p>'
110
- )
111
- with gr.Tab(label="Video Input"):
112
  with gr.Column():
113
  with gr.Row():
114
  video_input = gr.File(
115
- height=300,
116
  file_count="single",
117
- file_types=[".mp4", ".mov"],
118
- label="Video",
119
  )
120
  with gr.Column():
121
  video_info = gr.Markdown(
@@ -123,26 +171,79 @@ with gr.Blocks(
123
  **Video Info:**
124
  """
125
  )
126
- time_taken = gr.Textbox(label="Time Taken")
 
 
127
  with gr.Accordion(label="Advanced", open=False):
128
- stride = gr.Slider(
129
- label="Stride",
130
- minimum=1,
131
- maximum=5,
132
- step=1,
133
- value=2,
134
- )
135
- run_btn_single = gr.Button("Run")
136
- rerun_viewer_single = Rerun(height=900)
137
- run_btn_single.click(
138
- fn=predict,
139
- inputs=[video_input, stride],
140
- outputs=[rerun_viewer_single, time_taken],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  )
142
 
143
  video_input.upload(
144
  fn=on_file_upload, inputs=[video_input], outputs=[video_info]
145
  )
146
 
 
 
 
 
 
 
147
 
148
- demo.launch(share=False)
 
 
1
  import gradio as gr
2
 
 
3
  from gradio_rerun import Rerun
4
  import rerun as rr
5
  import rerun.blueprint as rrb
 
 
6
  import mmcv
7
+ from timeit import default_timer as timer
8
+ from typing import Literal
9
 
 
10
  from mini_dpvo.config import cfg as base_cfg
11
+ from mini_dpvo.api.inference import (
12
+ log_trajectory,
13
+ calib_from_dust3r,
14
+ create_reader,
15
+ calculate_num_frames,
16
+ )
17
+
18
+ import torch
19
+ import numpy as np
20
+ from pathlib import Path
21
+ from multiprocessing import Process, Queue
22
+ from mini_dpvo.dpvo import DPVO
23
+ from jaxtyping import UInt8, Float64, Float32
24
+ from mini_dust3r.model import AsymmetricCroCo3DStereo
25
+ from tqdm import tqdm
26
+ import tyro
27
+ from dataclasses import dataclass
28
+
29
+ if gr.NO_RELOAD:
30
+ NETWORK_PATH = "checkpoints/dpvo.pth"
31
+ DEVICE = (
32
+ "mps"
33
+ if torch.backends.mps.is_available()
34
+ else "cuda"
35
+ if torch.cuda.is_available()
36
+ else "cpu"
37
+ )
38
+ MODEL = AsymmetricCroCo3DStereo.from_pretrained(
39
+ "naver/DUSt3R_ViTLarge_BaseDecoder_512_dpt"
40
+ ).to(DEVICE)
41
+
42
+
43
+ @dataclass
44
+ class GradioDPVOConfig:
45
+ share: bool = False
46
+ port: int = 7860
47
+ server_name: str = "0.0.0.0"
48
+
49
+
50
+ @rr.thread_local_stream("mini_dpvo")
51
+ @torch.no_grad()
52
+ def run_dpvo(
53
+ video_file_path: str,
54
+ jpg_quality: str,
55
+ stride: int = 1,
56
+ skip: int = 0,
57
+ config_type: Literal["accurate", "fast"] = "accurate",
58
+ progress=gr.Progress(),
59
+ ):
60
+ # create a stream to send data back to the rerun viewer
61
+ stream = rr.binary_stream()
62
+ parent_log_path = Path("world")
63
+ rr.log(f"{parent_log_path}", rr.ViewCoordinates.RDF, timeless=True)
64
+
65
+ blueprint = rrb.Blueprint(
66
+ collapse_panels=True,
67
+ )
68
 
69
+ rr.send_blueprint(blueprint)
 
 
70
 
71
+ if config_type == "accurate":
72
+ base_cfg.merge_from_file("config/default.yaml")
73
+ elif config_type == "fast":
74
+ base_cfg.merge_from_file("config/fast.yaml")
 
 
 
 
 
75
  else:
76
+ raise ValueError("Invalid config type")
77
+ base_cfg.BUFFER_SIZE = 2048
78
+
79
+ slam = None
80
+ start_time = timer()
81
+ queue = Queue(maxsize=8)
82
+
83
+ reader: Process = create_reader(video_file_path, None, stride, skip, queue)
84
+ reader.start()
85
+
86
+ # get the first frame
87
+ progress(progress=0.1, desc="Estimating Camera Intrinsics")
88
+ _, bgr_hw3, _ = queue.get()
89
+ K_33_pred = calib_from_dust3r(bgr_hw3, MODEL, DEVICE)
90
+ intri_np: Float64[np.ndarray, "4"] = np.array(
91
+ [K_33_pred[0, 0], K_33_pred[1, 1], K_33_pred[0, 2], K_33_pred[1, 2]]
92
+ )
 
 
 
 
93
 
94
+ num_frames = calculate_num_frames(video_file_path, stride, skip)
95
+ path_list = []
96
 
97
+ with tqdm(total=num_frames, desc="Processing Frames") as pbar:
98
+ while True:
99
+ timestep: int
100
+ bgr_hw3: UInt8[np.ndarray, "h w 3"]
101
+ intri_np: Float64[np.ndarray, "4"]
102
+ (timestep, bgr_hw3, _) = queue.get()
103
+ # queue will have a (-1, image, intrinsics) tuple when the reader is done
104
+ if timestep < 0:
105
+ break
106
 
107
+ rr.set_time_sequence(timeline="timestep", sequence=timestep)
108
+
109
+ bgr_3hw: UInt8[torch.Tensor, "h w 3"] = (
110
+ torch.from_numpy(bgr_hw3).permute(2, 0, 1).cuda()
111
+ )
112
+ intri_torch: Float64[torch.Tensor, "4"] = torch.from_numpy(intri_np).cuda()
113
+
114
+ if slam is None:
115
+ _, h, w = bgr_3hw.shape
116
+ slam = DPVO(base_cfg, NETWORK_PATH, ht=h, wd=w)
 
 
 
 
 
 
 
 
 
117
 
118
+ slam(timestep, bgr_3hw, intri_torch)
119
+ pbar.update(1)
120
 
121
+ if slam.is_initialized:
122
+ poses: Float32[torch.Tensor, "buffer_size 7"] = slam.poses_
123
+ points: Float32[torch.Tensor, "buffer_size*num_patches 3"] = (
124
+ slam.points_
125
+ )
126
+ colors: UInt8[torch.Tensor, "buffer_size num_patches 3"] = slam.colors_
127
+ path_list = log_trajectory(
128
+ parent_log_path,
129
+ poses,
130
+ points,
131
+ colors,
132
+ intri_np,
133
+ bgr_hw3,
134
+ path_list,
135
+ jpg_quality,
136
+ )
137
+ yield stream.read(), timer() - start_time
138
 
139
 
140
  def on_file_upload(video_file_path: str) -> None:
 
147
  return video_info
148
 
149
 
150
+ def main(gradio_config: GradioDPVOConfig):
151
+ with gr.Blocks(
152
+ css=""".gradio-container {margin: 0 !important; min-width: 100%};""",
153
+ title="Mini-DPVO Demo",
154
+ ) as demo:
155
+ # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
156
+ gr.HTML('<h2 style="text-align: center;">Mini-DPVO Demo</h2>')
157
+ gr.HTML(
158
+ '<p style="text-align: center;">Unofficial DPVO demo using the mini-dpvo. Learn more about mini-dpvo <a href="https://github.com/pablovela5620/mini-dpvo">here</a>.</p>'
159
+ )
 
 
 
160
  with gr.Column():
161
  with gr.Row():
162
  video_input = gr.File(
163
+ height=100,
164
  file_count="single",
165
+ file_types=[".mp4", ".mov", ".MOV", ".webm"],
166
+ label="Video File",
167
  )
168
  with gr.Column():
169
  video_info = gr.Markdown(
 
171
  **Video Info:**
172
  """
173
  )
174
+ time_taken = gr.Number(
175
+ label="Time Taken (s)", precision=2, interactive=False
176
+ )
177
  with gr.Accordion(label="Advanced", open=False):
178
+ with gr.Row():
179
+ jpg_quality = gr.Radio(
180
+ label="JPEG Quality %: Lower quality means faster streaming",
181
+ choices=[10, 50, 90],
182
+ value=90,
183
+ type="value",
184
+ )
185
+ stride = gr.Slider(
186
+ label="Stride: How many frames to sample between each prediction",
187
+ minimum=1,
188
+ maximum=5,
189
+ step=1,
190
+ value=5,
191
+ )
192
+ skip = gr.Number(
193
+ label="Skip: How many frames to skip at the beginning",
194
+ value=0,
195
+ precision=0,
196
+ )
197
+ config_type = gr.Dropdown(
198
+ label="Config Type: Choose between accurate and fast",
199
+ value="fast",
200
+ choices=["accurate", "fast"],
201
+ max_choices=1,
202
+ )
203
+ with gr.Row():
204
+ start_btn = gr.Button("Run")
205
+ stop_btn = gr.Button("Stop")
206
+ rr_viewer = Rerun(height=600, streaming=True)
207
+
208
+ # Example videos
209
+ base_example_params = [50, 4, 0, "fast"]
210
+ example_dpvo_dir = Path("data/movies")
211
+ example_iphone_dir = Path("data/iphone")
212
+ example_video_paths = sorted(example_iphone_dir.glob("*.MOV")) + sorted(
213
+ example_dpvo_dir.glob("*.MOV")
214
+ )
215
+ example_video_paths = [str(path) for path in example_video_paths]
216
+
217
+ gr.Examples(
218
+ examples=[[path, *base_example_params] for path in example_video_paths],
219
+ inputs=[video_input, jpg_quality, stride, skip, config_type],
220
+ outputs=[rr_viewer],
221
+ fn=run_dpvo,
222
+ )
223
+
224
+ click_event = start_btn.click(
225
+ fn=run_dpvo,
226
+ inputs=[video_input, jpg_quality, stride, skip, config_type],
227
+ outputs=[rr_viewer, time_taken],
228
+ )
229
+
230
+ stop_btn.click(
231
+ fn=None,
232
+ inputs=[],
233
+ outputs=[],
234
+ cancels=[click_event],
235
  )
236
 
237
  video_input.upload(
238
  fn=on_file_upload, inputs=[video_input], outputs=[video_info]
239
  )
240
 
241
+ demo.launch(
242
+ share=gradio_config.share,
243
+ server_name=gradio_config.server_name,
244
+ server_port=gradio_config.port,
245
+ )
246
+
247
 
248
+ if __name__ == "__main__":
249
+ main(tyro.cli(GradioDPVOConfig))
tools/demo.py CHANGED
@@ -1,6 +1,6 @@
1
  from argparse import ArgumentParser
2
  import rerun as rr
3
- from mini_dpvo.api.inference import run
4
  from mini_dpvo.config import cfg as base_cfg
5
 
6
 
@@ -12,8 +12,7 @@ if __name__ == "__main__":
12
  parser.add_argument("--stride", type=int, default=2)
13
  parser.add_argument("--skip", type=int, default=0)
14
  parser.add_argument("--buffer", type=int, default=2048)
15
- parser.add_argument("--config", default="config/default.yaml")
16
- parser.add_argument("--vis-during", action="store_true")
17
  rr.script_add_args(parser)
18
  args = parser.parse_args()
19
  rr.script_setup(args, "mini_dpvo")
@@ -24,13 +23,12 @@ if __name__ == "__main__":
24
  print("Running with config...")
25
  print(base_cfg)
26
 
27
- run(
28
- base_cfg,
29
- args.network_path,
30
- args.imagedir,
31
- args.calib,
32
- args.stride,
33
- args.skip,
34
- vis_during=args.vis_during,
35
  )
36
  rr.script_teardown(args)
 
1
  from argparse import ArgumentParser
2
  import rerun as rr
3
+ from mini_dpvo.api.inference import inference_dpvo
4
  from mini_dpvo.config import cfg as base_cfg
5
 
6
 
 
12
  parser.add_argument("--stride", type=int, default=2)
13
  parser.add_argument("--skip", type=int, default=0)
14
  parser.add_argument("--buffer", type=int, default=2048)
15
+ parser.add_argument("--config", default="config/fast.yaml")
 
16
  rr.script_add_args(parser)
17
  args = parser.parse_args()
18
  rr.script_setup(args, "mini_dpvo")
 
23
  print("Running with config...")
24
  print(base_cfg)
25
 
26
+ inference_dpvo(
27
+ cfg=base_cfg,
28
+ network_path=args.network_path,
29
+ imagedir=args.imagedir,
30
+ calib=args.calib,
31
+ stride=args.stride,
32
+ skip=args.skip,
 
33
  )
34
  rr.script_teardown(args)