Luigi Piccinelli commited on
Commit
39aba6e
·
1 Parent(s): 1ea89dd

remove fp16

Browse files
Files changed (3) hide show
  1. app.py +38 -22
  2. gradio_demo.py +34 -17
  3. unik3d/models/unik3d.py +6 -4
app.py CHANGED
@@ -1,10 +1,15 @@
 
 
 
 
 
1
  import gc
2
  import os
3
  import shutil
 
4
  import time
5
  from datetime import datetime
6
  from math import pi
7
- import sys
8
 
9
  import gradio as gr
10
  import numpy as np
@@ -12,12 +17,10 @@ import torch
12
  import trimesh
13
  from PIL import Image
14
 
15
-
16
- sys.path.append("unik3d/")
17
 
18
  from unik3d.models import UniK3D
19
  from unik3d.utils.camera import OPENCV, Fisheye624, Pinhole, Spherical
20
- from unik3d.utils.visualization import colorize
21
 
22
 
23
  def predictions_to_glb(
@@ -86,7 +89,7 @@ def instantiate_camera(camera_name, params, device):
86
  return eval(camera_name)(params=torch.tensor(params).float()).to(device)
87
 
88
 
89
- def run_model(target_dir, model_name, camera_name, params):
90
 
91
  print("Instantiating model and camera...")
92
  model = instantiate_model(model_name)
@@ -102,6 +105,7 @@ def run_model(target_dir, model_name, camera_name, params):
102
 
103
  # Perform inference with the model.
104
  print("Running inference...")
 
105
  outputs = model.infer(image_tensor, camera=camera, normalize=True)
106
  outputs["image"] = image_tensor
107
 
@@ -127,8 +131,8 @@ def gradio_demo(
127
  hfov,
128
  mask_black_bg,
129
  mask_far_points,
 
130
  ):
131
- print(target_dir)
132
  if not os.path.isdir(target_dir) or target_dir == "None":
133
  return None, "No valid target directory found. Please upload first.", None
134
 
@@ -138,7 +142,7 @@ def gradio_demo(
138
  print("Running run_model...")
139
  params = [fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov]
140
  with torch.no_grad():
141
- outputs = run_model(target_dir, model_name, camera_name, params)
142
 
143
  # Save predictions
144
  points = outputs["points"].squeeze().permute(1, 2, 0).cpu().numpy()
@@ -399,8 +403,9 @@ if __name__ == "__main__":
399
  <li><strong>Upload Your Image:</strong> Use the "Upload Images" panel to provide your input.</li>
400
  <li><strong>Run:</strong> Click the "Run UniK3D" button to start the 3D estimation process.</li>
401
  <li><strong>Visualize:</strong> The 3D reconstruction will appear in the viewer on the right. You can rotate, pan, and zoom to explore the model, and download the GLB file.</li>
 
402
  </ol>
403
- <p><strong style="color: #ff7e26;">Please note:</strong> <span style="color: #ff7e26; font-weight: bold;">Our model runs on CPU on HuggingFace Space. Actual inference is less than 100ms second per image on consumer-level GPUs. Web-based 3D pointcloud visualization may be slow due to Gradio's rendering. For faster visualization, use a local machine to run our demo from our <a href="https://github.com/lpiccinelli-eth/UniK3D">GitHub repository</a>. </span></p>
404
  </div>
405
  """
406
  )
@@ -409,7 +414,7 @@ if __name__ == "__main__":
409
 
410
  with gr.Row():
411
  with gr.Column():
412
- camera_dropdown = gr.Dropdown(
413
  choices=[
414
  "Predicted",
415
  "Pinhole",
@@ -419,13 +424,14 @@ if __name__ == "__main__":
419
  ],
420
  label="Input Camera",
421
  )
422
- model_dropdown = gr.Dropdown(
423
  choices=["Large", "Base", "Small"], label="Utilized Model"
424
  )
425
  mask_black_bg = gr.Checkbox(
426
  label="Filter Black Background", value=False
427
  )
428
  mask_far_points = gr.Checkbox(label="Filter Far Points", value=False)
 
429
 
430
  with gr.Column():
431
  fx = gr.Number(label="Focal length x", value=500.0, visible=False)
@@ -498,6 +504,7 @@ if __name__ == "__main__":
498
  0.0,
499
  True,
500
  False,
 
501
  ],
502
  [
503
  "assets/demo/naruto.jpg",
@@ -518,9 +525,10 @@ if __name__ == "__main__":
518
  0.0,
519
  False,
520
  False,
 
521
  ],
522
  [
523
- "assets/demo/bears.jpg",
524
  "Large",
525
  "Predicted",
526
  0.0,
@@ -538,6 +546,7 @@ if __name__ == "__main__":
538
  0.0,
539
  True,
540
  False,
 
541
  ],
542
  [
543
  "assets/demo/berzirk.jpg",
@@ -558,6 +567,7 @@ if __name__ == "__main__":
558
  0.0,
559
  True,
560
  False,
 
561
  ],
562
  [
563
  "assets/demo/luke.webp",
@@ -578,6 +588,7 @@ if __name__ == "__main__":
578
  0.0,
579
  False,
580
  False,
 
581
  ],
582
  [
583
  "assets/demo/equirectangular.jpg",
@@ -598,6 +609,7 @@ if __name__ == "__main__":
598
  360.0,
599
  False,
600
  False,
 
601
  ],
602
  [
603
  "assets/demo/venice.jpg",
@@ -618,6 +630,7 @@ if __name__ == "__main__":
618
  360.0,
619
  False,
620
  True,
 
621
  ],
622
  [
623
  "assets/demo/dl3dv.png",
@@ -638,9 +651,10 @@ if __name__ == "__main__":
638
  0.0,
639
  False,
640
  False,
 
641
  ],
642
  [
643
- "assets/demo/scannet.jpg",
644
  "Large",
645
  "Fisheye624",
646
  791.90869140625,
@@ -658,6 +672,7 @@ if __name__ == "__main__":
658
  0.0,
659
  False,
660
  False,
 
661
  ],
662
  ]
663
 
@@ -680,6 +695,7 @@ if __name__ == "__main__":
680
  hfov,
681
  mask_black_bg,
682
  mask_far_points,
 
683
  ):
684
  target_dir, image_path = handle_uploads(input_image)
685
  glbfile, log_msg, prediction_save_path = gradio_demo(
@@ -701,6 +717,7 @@ if __name__ == "__main__":
701
  hfov,
702
  mask_black_bg,
703
  mask_far_points,
 
704
  )
705
  return (
706
  glbfile,
@@ -716,8 +733,8 @@ if __name__ == "__main__":
716
  examples=examples,
717
  inputs=[
718
  input_image,
719
- model_dropdown,
720
- camera_dropdown,
721
  fx,
722
  fy,
723
  cx,
@@ -733,6 +750,7 @@ if __name__ == "__main__":
733
  hfov,
734
  mask_black_bg,
735
  mask_far_points,
 
736
  ],
737
  outputs=[reconstruction_output, log_output, reconstruction_npy],
738
  fn=example_pipeline,
@@ -746,8 +764,8 @@ if __name__ == "__main__":
746
  fn=gradio_demo,
747
  inputs=[
748
  target_dir_output,
749
- model_dropdown,
750
- camera_dropdown,
751
  fx,
752
  fy,
753
  cx,
@@ -763,6 +781,7 @@ if __name__ == "__main__":
763
  hfov,
764
  mask_black_bg,
765
  mask_far_points,
 
766
  ],
767
  outputs=[reconstruction_output, log_output, reconstruction_npy],
768
  ).then(
@@ -788,13 +807,10 @@ if __name__ == "__main__":
788
  )
789
 
790
  # Dynamically update intrinsic parameter visibility when camera selection changes.
791
- camera_dropdown.change(
792
  fn=update_parameters,
793
- inputs=camera_dropdown,
794
  outputs=[fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov],
795
  )
796
 
797
- # demo.queue(max_size=20).launch(show_error=True, share=False, ssr_mode=False)
798
- demo.launch(
799
- show_error=True,
800
- )
 
1
+ """
2
+ Author: Luigi Piccinelli
3
+ Licensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)
4
+ """
5
+
6
  import gc
7
  import os
8
  import shutil
9
+ import sys
10
  import time
11
  from datetime import datetime
12
  from math import pi
 
13
 
14
  import gradio as gr
15
  import numpy as np
 
17
  import trimesh
18
  from PIL import Image
19
 
20
+ sys.path.append("./unik3d/")
 
21
 
22
  from unik3d.models import UniK3D
23
  from unik3d.utils.camera import OPENCV, Fisheye624, Pinhole, Spherical
 
24
 
25
 
26
  def predictions_to_glb(
 
89
  return eval(camera_name)(params=torch.tensor(params).float()).to(device)
90
 
91
 
92
+ def run_model(target_dir, model_name, camera_name, params, efficiency):
93
 
94
  print("Instantiating model and camera...")
95
  model = instantiate_model(model_name)
 
105
 
106
  # Perform inference with the model.
107
  print("Running inference...")
108
+ model.resolution_level = min(efficiency, 9.0)
109
  outputs = model.infer(image_tensor, camera=camera, normalize=True)
110
  outputs["image"] = image_tensor
111
 
 
131
  hfov,
132
  mask_black_bg,
133
  mask_far_points,
134
+ efficiency
135
  ):
 
136
  if not os.path.isdir(target_dir) or target_dir == "None":
137
  return None, "No valid target directory found. Please upload first.", None
138
 
 
142
  print("Running run_model...")
143
  params = [fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov]
144
  with torch.no_grad():
145
+ outputs = run_model(target_dir, model_name, camera_name, params, efficiency)
146
 
147
  # Save predictions
148
  points = outputs["points"].squeeze().permute(1, 2, 0).cpu().numpy()
 
403
  <li><strong>Upload Your Image:</strong> Use the "Upload Images" panel to provide your input.</li>
404
  <li><strong>Run:</strong> Click the "Run UniK3D" button to start the 3D estimation process.</li>
405
  <li><strong>Visualize:</strong> The 3D reconstruction will appear in the viewer on the right. You can rotate, pan, and zoom to explore the model, and download the GLB file.</li>
406
+ <li><strong>Downstream:</strong> The 3D output can be used as reconstruction or for monocular camera calibration.</li>
407
  </ol>
408
+ <p><strong style="color: #ff7e26;">Please note:</strong> <span style="color: #ff7e26; font-weight: bold;">Our model runs on CPU on HuggingFace Space. Actual inference is less than 100ms second per image on consumer-level GPUs, on Spaces will take between 20s and 90s, depending on the "Speed-Resoltion Tradeoff" chosen. Web-based 3D pointcloud visualization may be slow due to Gradio's rendering. For faster visualization, use a local machine to run our demo from our <a href="https://github.com/lpiccinelli-eth/UniK3D">GitHub repository</a>. </span></p>
409
  </div>
410
  """
411
  )
 
414
 
415
  with gr.Row():
416
  with gr.Column():
417
+ camera_model = gr.Dropdown(
418
  choices=[
419
  "Predicted",
420
  "Pinhole",
 
424
  ],
425
  label="Input Camera",
426
  )
427
+ model_size = gr.Dropdown(
428
  choices=["Large", "Base", "Small"], label="Utilized Model"
429
  )
430
  mask_black_bg = gr.Checkbox(
431
  label="Filter Black Background", value=False
432
  )
433
  mask_far_points = gr.Checkbox(label="Filter Far Points", value=False)
434
+ efficiency = gr.Slider(0, 10, step=1, value=10, label="Speed-Resolution Tradeoff", info="Lower is faster and Higher is more detailed")
435
 
436
  with gr.Column():
437
  fx = gr.Number(label="Focal length x", value=500.0, visible=False)
 
504
  0.0,
505
  True,
506
  False,
507
+ 10.0,
508
  ],
509
  [
510
  "assets/demo/naruto.jpg",
 
525
  0.0,
526
  False,
527
  False,
528
+ 10.0,
529
  ],
530
  [
531
+ "assets/demo/bears.png",
532
  "Large",
533
  "Predicted",
534
  0.0,
 
546
  0.0,
547
  True,
548
  False,
549
+ 10.0,
550
  ],
551
  [
552
  "assets/demo/berzirk.jpg",
 
567
  0.0,
568
  True,
569
  False,
570
+ 10.0,
571
  ],
572
  [
573
  "assets/demo/luke.webp",
 
588
  0.0,
589
  False,
590
  False,
591
+ 10.0,
592
  ],
593
  [
594
  "assets/demo/equirectangular.jpg",
 
609
  360.0,
610
  False,
611
  False,
612
+ 10.0,
613
  ],
614
  [
615
  "assets/demo/venice.jpg",
 
630
  360.0,
631
  False,
632
  True,
633
+ 10.0,
634
  ],
635
  [
636
  "assets/demo/dl3dv.png",
 
651
  0.0,
652
  False,
653
  False,
654
+ 10.0,
655
  ],
656
  [
657
+ "assets/demo/scannet.png",
658
  "Large",
659
  "Fisheye624",
660
  791.90869140625,
 
672
  0.0,
673
  False,
674
  False,
675
+ 10.0,
676
  ],
677
  ]
678
 
 
695
  hfov,
696
  mask_black_bg,
697
  mask_far_points,
698
+ efficiency
699
  ):
700
  target_dir, image_path = handle_uploads(input_image)
701
  glbfile, log_msg, prediction_save_path = gradio_demo(
 
717
  hfov,
718
  mask_black_bg,
719
  mask_far_points,
720
+ efficiency
721
  )
722
  return (
723
  glbfile,
 
733
  examples=examples,
734
  inputs=[
735
  input_image,
736
+ model_size,
737
+ camera_model,
738
  fx,
739
  fy,
740
  cx,
 
750
  hfov,
751
  mask_black_bg,
752
  mask_far_points,
753
+ efficiency
754
  ],
755
  outputs=[reconstruction_output, log_output, reconstruction_npy],
756
  fn=example_pipeline,
 
764
  fn=gradio_demo,
765
  inputs=[
766
  target_dir_output,
767
+ model_size,
768
+ camera_model,
769
  fx,
770
  fy,
771
  cx,
 
781
  hfov,
782
  mask_black_bg,
783
  mask_far_points,
784
+ efficiency
785
  ],
786
  outputs=[reconstruction_output, log_output, reconstruction_npy],
787
  ).then(
 
807
  )
808
 
809
  # Dynamically update intrinsic parameter visibility when camera selection changes.
810
+ camera_model.change(
811
  fn=update_parameters,
812
+ inputs=camera_model,
813
  outputs=[fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov],
814
  )
815
 
816
+ demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)
 
 
 
gradio_demo.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import gc
2
  import os
3
  import shutil
@@ -13,7 +18,6 @@ from PIL import Image
13
 
14
  from unik3d.models import UniK3D
15
  from unik3d.utils.camera import OPENCV, Fisheye624, Pinhole, Spherical
16
- from unik3d.utils.visualization import colorize
17
 
18
 
19
  def predictions_to_glb(
@@ -82,7 +86,7 @@ def instantiate_camera(camera_name, params, device):
82
  return eval(camera_name)(params=torch.tensor(params).float()).to(device)
83
 
84
 
85
- def run_model(target_dir, model_name, camera_name, params):
86
 
87
  print("Instantiating model and camera...")
88
  model = instantiate_model(model_name)
@@ -98,6 +102,7 @@ def run_model(target_dir, model_name, camera_name, params):
98
 
99
  # Perform inference with the model.
100
  print("Running inference...")
 
101
  outputs = model.infer(image_tensor, camera=camera, normalize=True)
102
  outputs["image"] = image_tensor
103
 
@@ -123,8 +128,8 @@ def gradio_demo(
123
  hfov,
124
  mask_black_bg,
125
  mask_far_points,
 
126
  ):
127
- print(target_dir)
128
  if not os.path.isdir(target_dir) or target_dir == "None":
129
  return None, "No valid target directory found. Please upload first.", None
130
 
@@ -134,7 +139,7 @@ def gradio_demo(
134
  print("Running run_model...")
135
  params = [fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov]
136
  with torch.no_grad():
137
- outputs = run_model(target_dir, model_name, camera_name, params)
138
 
139
  # Save predictions
140
  points = outputs["points"].squeeze().permute(1, 2, 0).cpu().numpy()
@@ -395,8 +400,9 @@ if __name__ == "__main__":
395
  <li><strong>Upload Your Image:</strong> Use the "Upload Images" panel to provide your input.</li>
396
  <li><strong>Run:</strong> Click the "Run UniK3D" button to start the 3D estimation process.</li>
397
  <li><strong>Visualize:</strong> The 3D reconstruction will appear in the viewer on the right. You can rotate, pan, and zoom to explore the model, and download the GLB file.</li>
 
398
  </ol>
399
- <p><strong style="color: #ff7e26;">Please note:</strong> <span style="color: #ff7e26; font-weight: bold;">Our model runs on CPU on HuggingFace Space. Actual inference is less than 100ms second per image on consumer-level GPUs. Web-based 3D pointcloud visualization may be slow due to Gradio's rendering. For faster visualization, use a local machine to run our demo from our <a href="https://github.com/lpiccinelli-eth/UniK3D">GitHub repository</a>. </span></p>
400
  </div>
401
  """
402
  )
@@ -405,7 +411,7 @@ if __name__ == "__main__":
405
 
406
  with gr.Row():
407
  with gr.Column():
408
- camera_dropdown = gr.Dropdown(
409
  choices=[
410
  "Predicted",
411
  "Pinhole",
@@ -415,13 +421,14 @@ if __name__ == "__main__":
415
  ],
416
  label="Input Camera",
417
  )
418
- model_dropdown = gr.Dropdown(
419
  choices=["Large", "Base", "Small"], label="Utilized Model"
420
  )
421
  mask_black_bg = gr.Checkbox(
422
  label="Filter Black Background", value=False
423
  )
424
  mask_far_points = gr.Checkbox(label="Filter Far Points", value=False)
 
425
 
426
  with gr.Column():
427
  fx = gr.Number(label="Focal length x", value=500.0, visible=False)
@@ -494,6 +501,7 @@ if __name__ == "__main__":
494
  0.0,
495
  True,
496
  False,
 
497
  ],
498
  [
499
  "assets/demo/naruto.jpg",
@@ -514,6 +522,7 @@ if __name__ == "__main__":
514
  0.0,
515
  False,
516
  False,
 
517
  ],
518
  [
519
  "assets/demo/bears.png",
@@ -534,6 +543,7 @@ if __name__ == "__main__":
534
  0.0,
535
  True,
536
  False,
 
537
  ],
538
  [
539
  "assets/demo/berzirk.jpg",
@@ -554,6 +564,7 @@ if __name__ == "__main__":
554
  0.0,
555
  True,
556
  False,
 
557
  ],
558
  [
559
  "assets/demo/luke.webp",
@@ -574,6 +585,7 @@ if __name__ == "__main__":
574
  0.0,
575
  False,
576
  False,
 
577
  ],
578
  [
579
  "assets/demo/equirectangular.jpg",
@@ -594,6 +606,7 @@ if __name__ == "__main__":
594
  360.0,
595
  False,
596
  False,
 
597
  ],
598
  [
599
  "assets/demo/venice.jpg",
@@ -614,6 +627,7 @@ if __name__ == "__main__":
614
  360.0,
615
  False,
616
  True,
 
617
  ],
618
  [
619
  "assets/demo/dl3dv.png",
@@ -634,6 +648,7 @@ if __name__ == "__main__":
634
  0.0,
635
  False,
636
  False,
 
637
  ],
638
  [
639
  "assets/demo/scannet.png",
@@ -654,6 +669,7 @@ if __name__ == "__main__":
654
  0.0,
655
  False,
656
  False,
 
657
  ],
658
  ]
659
 
@@ -676,6 +692,7 @@ if __name__ == "__main__":
676
  hfov,
677
  mask_black_bg,
678
  mask_far_points,
 
679
  ):
680
  target_dir, image_path = handle_uploads(input_image)
681
  glbfile, log_msg, prediction_save_path = gradio_demo(
@@ -697,6 +714,7 @@ if __name__ == "__main__":
697
  hfov,
698
  mask_black_bg,
699
  mask_far_points,
 
700
  )
701
  return (
702
  glbfile,
@@ -712,8 +730,8 @@ if __name__ == "__main__":
712
  examples=examples,
713
  inputs=[
714
  input_image,
715
- model_dropdown,
716
- camera_dropdown,
717
  fx,
718
  fy,
719
  cx,
@@ -729,6 +747,7 @@ if __name__ == "__main__":
729
  hfov,
730
  mask_black_bg,
731
  mask_far_points,
 
732
  ],
733
  outputs=[reconstruction_output, log_output, reconstruction_npy],
734
  fn=example_pipeline,
@@ -742,8 +761,8 @@ if __name__ == "__main__":
742
  fn=gradio_demo,
743
  inputs=[
744
  target_dir_output,
745
- model_dropdown,
746
- camera_dropdown,
747
  fx,
748
  fy,
749
  cx,
@@ -759,6 +778,7 @@ if __name__ == "__main__":
759
  hfov,
760
  mask_black_bg,
761
  mask_far_points,
 
762
  ],
763
  outputs=[reconstruction_output, log_output, reconstruction_npy],
764
  ).then(
@@ -784,13 +804,10 @@ if __name__ == "__main__":
784
  )
785
 
786
  # Dynamically update intrinsic parameter visibility when camera selection changes.
787
- camera_dropdown.change(
788
  fn=update_parameters,
789
- inputs=camera_dropdown,
790
  outputs=[fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov],
791
  )
792
 
793
- # demo.queue(max_size=20).launch(show_error=True, share=False, ssr_mode=False)
794
- demo.launch(
795
- show_error=True,
796
- )
 
1
+ """
2
+ Author: Luigi Piccinelli
3
+ Licensed under the CC-BY NC 4.0 license (http://creativecommons.org/licenses/by-nc/4.0/)
4
+ """
5
+
6
  import gc
7
  import os
8
  import shutil
 
18
 
19
  from unik3d.models import UniK3D
20
  from unik3d.utils.camera import OPENCV, Fisheye624, Pinhole, Spherical
 
21
 
22
 
23
  def predictions_to_glb(
 
86
  return eval(camera_name)(params=torch.tensor(params).float()).to(device)
87
 
88
 
89
+ def run_model(target_dir, model_name, camera_name, params, efficiency):
90
 
91
  print("Instantiating model and camera...")
92
  model = instantiate_model(model_name)
 
102
 
103
  # Perform inference with the model.
104
  print("Running inference...")
105
+ model.resolution_level = min(efficiency, 9.0)
106
  outputs = model.infer(image_tensor, camera=camera, normalize=True)
107
  outputs["image"] = image_tensor
108
 
 
128
  hfov,
129
  mask_black_bg,
130
  mask_far_points,
131
+ efficiency
132
  ):
 
133
  if not os.path.isdir(target_dir) or target_dir == "None":
134
  return None, "No valid target directory found. Please upload first.", None
135
 
 
139
  print("Running run_model...")
140
  params = [fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov]
141
  with torch.no_grad():
142
+ outputs = run_model(target_dir, model_name, camera_name, params, efficiency)
143
 
144
  # Save predictions
145
  points = outputs["points"].squeeze().permute(1, 2, 0).cpu().numpy()
 
400
  <li><strong>Upload Your Image:</strong> Use the "Upload Images" panel to provide your input.</li>
401
  <li><strong>Run:</strong> Click the "Run UniK3D" button to start the 3D estimation process.</li>
402
  <li><strong>Visualize:</strong> The 3D reconstruction will appear in the viewer on the right. You can rotate, pan, and zoom to explore the model, and download the GLB file.</li>
403
+ <li><strong>Downstream:</strong> The 3D output can be used as reconstruction or for monocular camera calibration.</li>
404
  </ol>
405
+ <p><strong style="color: #ff7e26;">Please note:</strong> <span style="color: #ff7e26; font-weight: bold;">Our model runs on CPU on HuggingFace Space. Actual inference is less than 100ms second per image on consumer-level GPUs, on Spaces will take between 20s and 90s, depending on the "Speed-Resoltion Tradeoff" chosen. Web-based 3D pointcloud visualization may be slow due to Gradio's rendering. For faster visualization, use a local machine to run our demo from our <a href="https://github.com/lpiccinelli-eth/UniK3D">GitHub repository</a>. </span></p>
406
  </div>
407
  """
408
  )
 
411
 
412
  with gr.Row():
413
  with gr.Column():
414
+ camera_model = gr.Dropdown(
415
  choices=[
416
  "Predicted",
417
  "Pinhole",
 
421
  ],
422
  label="Input Camera",
423
  )
424
+ model_size = gr.Dropdown(
425
  choices=["Large", "Base", "Small"], label="Utilized Model"
426
  )
427
  mask_black_bg = gr.Checkbox(
428
  label="Filter Black Background", value=False
429
  )
430
  mask_far_points = gr.Checkbox(label="Filter Far Points", value=False)
431
+ efficiency = gr.Slider(0, 10, step=1, value=10, label="Speed-Resolution Tradeoff", info="Lower is faster and Higher is more detailed")
432
 
433
  with gr.Column():
434
  fx = gr.Number(label="Focal length x", value=500.0, visible=False)
 
501
  0.0,
502
  True,
503
  False,
504
+ 10.0,
505
  ],
506
  [
507
  "assets/demo/naruto.jpg",
 
522
  0.0,
523
  False,
524
  False,
525
+ 10.0,
526
  ],
527
  [
528
  "assets/demo/bears.png",
 
543
  0.0,
544
  True,
545
  False,
546
+ 10.0,
547
  ],
548
  [
549
  "assets/demo/berzirk.jpg",
 
564
  0.0,
565
  True,
566
  False,
567
+ 10.0,
568
  ],
569
  [
570
  "assets/demo/luke.webp",
 
585
  0.0,
586
  False,
587
  False,
588
+ 10.0,
589
  ],
590
  [
591
  "assets/demo/equirectangular.jpg",
 
606
  360.0,
607
  False,
608
  False,
609
+ 10.0,
610
  ],
611
  [
612
  "assets/demo/venice.jpg",
 
627
  360.0,
628
  False,
629
  True,
630
+ 10.0,
631
  ],
632
  [
633
  "assets/demo/dl3dv.png",
 
648
  0.0,
649
  False,
650
  False,
651
+ 10.0,
652
  ],
653
  [
654
  "assets/demo/scannet.png",
 
669
  0.0,
670
  False,
671
  False,
672
+ 10.0,
673
  ],
674
  ]
675
 
 
692
  hfov,
693
  mask_black_bg,
694
  mask_far_points,
695
+ efficiency
696
  ):
697
  target_dir, image_path = handle_uploads(input_image)
698
  glbfile, log_msg, prediction_save_path = gradio_demo(
 
714
  hfov,
715
  mask_black_bg,
716
  mask_far_points,
717
+ efficiency
718
  )
719
  return (
720
  glbfile,
 
730
  examples=examples,
731
  inputs=[
732
  input_image,
733
+ model_size,
734
+ camera_model,
735
  fx,
736
  fy,
737
  cx,
 
747
  hfov,
748
  mask_black_bg,
749
  mask_far_points,
750
+ efficiency
751
  ],
752
  outputs=[reconstruction_output, log_output, reconstruction_npy],
753
  fn=example_pipeline,
 
761
  fn=gradio_demo,
762
  inputs=[
763
  target_dir_output,
764
+ model_size,
765
+ camera_model,
766
  fx,
767
  fy,
768
  cx,
 
778
  hfov,
779
  mask_black_bg,
780
  mask_far_points,
781
+ efficiency
782
  ],
783
  outputs=[reconstruction_output, log_output, reconstruction_npy],
784
  ).then(
 
804
  )
805
 
806
  # Dynamically update intrinsic parameter visibility when camera selection changes.
807
+ camera_model.change(
808
  fn=update_parameters,
809
+ inputs=camera_model,
810
  outputs=[fx, fy, cx, cy, k1, k2, k3, k4, k5, k6, t1, t2, hfov],
811
  )
812
 
813
+ demo.queue(max_size=20).launch(show_error=True, share=True, ssr_mode=False)
 
 
 
unik3d/models/unik3d.py CHANGED
@@ -22,6 +22,7 @@ from unik3d.utils.distributed import is_main_process
22
  from unik3d.utils.misc import get_params, last_stack, match_gt
23
 
24
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
25
 
26
 
27
  def orthonormal_init(num_tokens, dims):
@@ -146,7 +147,7 @@ class UniK3D(
146
  )
147
 
148
  # compute loss!
149
- inputs["radius"] = torch.norm(pts_gt, dim=1, keepdim=True)
150
  inputs["points"] = pts_gt
151
  inputs["depth_mask"] = mask
152
  losses = self.compute_losses(outputs, inputs, image_metas)
@@ -241,8 +242,8 @@ class UniK3D(
241
  ).reshape(B)
242
  loss = self.losses["depth"]
243
  depth_losses = loss(
244
- outputs["depth"],
245
- target=inputs["depth"],
246
  mask=inputs["depth_mask"].clone(),
247
  si=si,
248
  )
@@ -264,6 +265,7 @@ class UniK3D(
264
  target_pred=outputs["depth"],
265
  mask=inputs["depth_mask"].clone(),
266
  )
 
267
  losses["opt"][loss.name + "_conf"] = loss.weight * conf_losses.mean()
268
  losses_to_be_computed.remove("confidence")
269
 
@@ -274,7 +276,7 @@ class UniK3D(
274
  return losses
275
 
276
  @torch.no_grad()
277
- @torch.autocast(device_type=DEVICE, enabled=True, dtype=torch.float16)
278
  def infer(
279
  self,
280
  rgb: torch.Tensor,
 
22
  from unik3d.utils.misc import get_params, last_stack, match_gt
23
 
24
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
25
+ ENABLED = torch.cuda.is_available()
26
 
27
 
28
  def orthonormal_init(num_tokens, dims):
 
147
  )
148
 
149
  # compute loss!
150
+ inputs["distance"] = torch.norm(pts_gt, dim=1, keepdim=True)
151
  inputs["points"] = pts_gt
152
  inputs["depth_mask"] = mask
153
  losses = self.compute_losses(outputs, inputs, image_metas)
 
242
  ).reshape(B)
243
  loss = self.losses["depth"]
244
  depth_losses = loss(
245
+ outputs["distance"],
246
+ target=inputs["distance"],
247
  mask=inputs["depth_mask"].clone(),
248
  si=si,
249
  )
 
265
  target_pred=outputs["depth"],
266
  mask=inputs["depth_mask"].clone(),
267
  )
268
+ print(conf_losses, camera_losses, depth_losses)
269
  losses["opt"][loss.name + "_conf"] = loss.weight * conf_losses.mean()
270
  losses_to_be_computed.remove("confidence")
271
 
 
276
  return losses
277
 
278
  @torch.no_grad()
279
+ @torch.autocast(device_type=DEVICE, enabled=ENABLED, dtype=torch.float16)
280
  def infer(
281
  self,
282
  rgb: torch.Tensor,