Spaces:

lpiccinelli
/

UniK3D-demo

Running on Zero

Luigi Piccinelli commited on Mar 20

Commit

183b4b6

1 Parent(s): 39aba6e

remove fp16

Files changed (2) hide show

app.py CHANGED Viewed

@@ -405,7 +405,7 @@ if __name__ == "__main__":
             <li><strong>Visualize:</strong> The 3D reconstruction will appear in the viewer on the right. You can rotate, pan, and zoom to explore the model, and download the GLB file.</li>
             <li><strong>Downstream:</strong> The 3D output can be used as reconstruction or for monocular camera calibration.</li>
         </ol>
-        <p><strong style="color: #ff7e26;">Please note:</strong> <span style="color: #ff7e26; font-weight: bold;">Our model runs on CPU on HuggingFace Space. Actual inference is less than 100ms second per image on consumer-level GPUs, on Spaces will take between 20s and 90s, depending on the "Speed-Resoltion Tradeoff" chosen. Web-based 3D pointcloud visualization may be slow due to Gradio's rendering. For faster visualization, use a local machine to run our demo from our <a href="https://github.com/lpiccinelli-eth/UniK3D">GitHub repository</a>. </span></p>
         </div>
         """
         )
@@ -528,7 +528,7 @@ if __name__ == "__main__":
                 10.0,
             ],
             [
-                "assets/demo/bears.png",
                 "Large",
                 "Predicted",
                 0.0,
@@ -654,7 +654,7 @@ if __name__ == "__main__":
                 10.0,
             ],
             [
-                "assets/demo/scannet.png",
                 "Large",
                 "Fisheye624",
                 791.90869140625,

             <li><strong>Visualize:</strong> The 3D reconstruction will appear in the viewer on the right. You can rotate, pan, and zoom to explore the model, and download the GLB file.</li>
             <li><strong>Downstream:</strong> The 3D output can be used as reconstruction or for monocular camera calibration.</li>
         </ol>
+        <p><strong style="color: #ff7e26;">Please note:</strong> <span style="color: #ff7e26; font-weight: bold;">Our model runs on CPU on HuggingFace Space. Actual inference is less than 100ms second per image on consumer-level GPUs, on Spaces will take between 20s and 90s, depending on the "Speed-Resolution Tradeoff" chosen and the first inference is slower (downloading model). Web-based 3D pointcloud visualization may be slow due to Gradio's rendering. For faster visualization, use a local machine to run our demo from our <a href="https://github.com/lpiccinelli-eth/UniK3D">GitHub repository</a>. </span></p>
         </div>
         """
         )
                 10.0,
             ],
             [
+                "assets/demo/bears.jpg",
                 "Large",
                 "Predicted",
                 0.0,
                 10.0,
             ],
             [
+                "assets/demo/scannet.jpg",
                 "Large",
                 "Fisheye624",
                 791.90869140625,

unik3d/models/unik3d.py CHANGED Viewed

@@ -21,9 +21,6 @@ from unik3d.utils.constants import IMAGENET_DATASET_MEAN, IMAGENET_DATASET_STD
 from unik3d.utils.distributed import is_main_process
 from unik3d.utils.misc import get_params, last_stack, match_gt
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-ENABLED = torch.cuda.is_available()
 def orthonormal_init(num_tokens, dims):
     pe = torch.randn(num_tokens, dims)
@@ -276,7 +273,6 @@ class UniK3D(
         return losses
     @torch.no_grad()
-    @torch.autocast(device_type=DEVICE, enabled=ENABLED, dtype=torch.float16)
     def infer(
         self,
         rgb: torch.Tensor,

 from unik3d.utils.distributed import is_main_process
 from unik3d.utils.misc import get_params, last_stack, match_gt
 def orthonormal_init(num_tokens, dims):
     pe = torch.randn(num_tokens, dims)
         return losses
     @torch.no_grad()
     def infer(
         self,
         rgb: torch.Tensor,