Spaces:

Stable-X
/

StableRecon

Runtime error

App Files Files Community

Stable-X commited on Oct 26, 2024

Commit

2c5f88b

1 Parent(s): 2cc5b1b

feat: Add rendering output and refinement flag

Browse files

Files changed (1) hide show

app.py +94 -40

app.py CHANGED Viewed

@@ -16,12 +16,13 @@ from transformers import AutoModelForImageSegmentation
 from torchvision import transforms
 from PIL import Image
 import open3d as o3d
 from backend_utils import improved_multiway_registration, pts2normal, point2mesh, combine_and_clean_point_clouds
 from gs_utils import point2gs
 from gradio.helpers import Examples as GradioExamples
 from gradio.utils import get_cache_folder
 from pathlib import Path
 # Default values
 DEFAULT_CKPT_PATH = './checkpoints/spann3r.pth'
 DEFAULT_DUST3R_PATH = 'https://huggingface.co/camenduru/dust3r/resolve/main/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth'
@@ -209,9 +210,47 @@ def center_pcd(pcd: o3d.geometry.PointCloud, normalize=False) -> o3d.geometry.Po
         pcd.points = o3d.utility.Vector3dVector(centered_points)
         return pcd
 @torch.no_grad()
 def reconstruct(video_path, conf_thresh, kf_every,
-                remove_background=False):
     # Extract frames from video
     demo_path = extract_frames(video_path)
@@ -234,6 +273,8 @@ def reconstruct(video_path, conf_thresh, kf_every,
     # Process results
     pcds = []
     for j, view in enumerate(batch):
         image = view['img'].permute(0, 2, 3, 1).cpu().numpy()[0]
         image = (image + 1) / 2
@@ -248,28 +289,40 @@ def reconstruct(video_path, conf_thresh, kf_every,
         combined_mask = (conf_sig > conf_thresh) & (mask > 0.5)
         pcd = o3d.geometry.PointCloud()
         pcd.points = o3d.utility.Vector3dVector(pts[combined_mask])
         pcd.colors = o3d.utility.Vector3dVector(image[combined_mask])
         pcd.normals = o3d.utility.Vector3dVector(pts_normal[combined_mask])
         pcds.append(pcd)
     pcd_combined = combine_and_clean_point_clouds(pcds, voxel_size=0.001)
-    pcd_combined = center_pcd(pcd_combined, normalize=True)
     o3d_geometry = point2mesh(pcd_combined)
-    # Create coarse result
-    coarse_output_path = export_geometry(o3d_geometry)
-    yield coarse_output_path, None
-    transformed_pcds, _, _ = improved_multiway_registration(pcds, voxel_size=0.01)
-    transformed_pcds = center_pcd(transformed_pcds)
     # Create coarse result
-    refined_output_path = tempfile.mktemp(suffix='.ply')
-    point2gs(refined_output_path, transformed_pcds)
-    yield coarse_output_path, refined_output_path
     # Clean up temporary directory
     os.system(f"rm -rf {demo_path}")
@@ -345,13 +398,26 @@ with gr.Blocks(
                 kf_every = gr.Slider(1, 30, step=1, value=1, label="Keyframe Interval")
             with gr.Row():
                 remove_background = gr.Checkbox(label="Remove Background", value=False)
             reconstruct_btn = gr.Button("Start Reconstruction")
         with gr.Column(scale=2):
             with gr.Tab("3D Models"):
                 with gr.Group():
-                    initial_model = gr.Model3D(label="Initial 3D Model", display_mode="solid",
-                                               clear_color=[0.0, 0.0, 0.0, 0.0])
                     gr.Markdown(
                         """
                         <div class="model-description">
@@ -361,33 +427,21 @@ with gr.Blocks(
                     )
                 with gr.Group():
-                    optimized_model = gr.Model3D(label="Optimized 3D Model", display_mode="solid",
-                                                 clear_color=[0.0, 0.0, 0.0, 0.0])
                     gr.Markdown(
                         """
                         <div class="model-description">
-                        This is the optimized 3D model with improved accuracy and detail using Gaussian Splatting. Finish within 60 seconds.
                         </div>
                         """
                     )
-            with gr.Tab("Help"):
-                gr.Markdown(
-                    """
-                    ## How to use this tool:
-                    1. Upload a video of the object you want to reconstruct.
-                    2. Adjust the Confidence Threshold and Keyframe Interval if needed.
-                    3. Choose whether to remove the background.
-                    4. Click "Start Reconstruction" to begin the process.
-                    5. The Initial 3D Model will appear first, giving you a quick preview.
-                    6. Once processing is complete, the Optimized 3D Model will show the final result.
-                    ### Tips:
-                    - For best results, ensure your video captures the object from multiple angles.
-                    - If the model appears noisy, try increasing the Confidence Threshold.
-                    - Experiment with different Keyframe Intervals to balance speed and accuracy.
-                    """
-                )
     Examples(
         fn=reconstruct,
@@ -396,15 +450,15 @@ with gr.Blocks(
             for name in os.listdir(os.path.join("examples")) if name.endswith('.webm')
         ]),
         inputs=[video_input],
-        outputs=[initial_model, optimized_model],
         directory_name="examples_video",
         cache_examples=False,
     )
     reconstruct_btn.click(
         fn=reconstruct,
-        inputs=[video_input, conf_thresh, kf_every, remove_background],
-        outputs=[initial_model, optimized_model]
     )
 if __name__ == "__main__":

 from torchvision import transforms
 from PIL import Image
 import open3d as o3d
+from spann3r.tools.vis import render_frames
 from backend_utils import improved_multiway_registration, pts2normal, point2mesh, combine_and_clean_point_clouds
 from gs_utils import point2gs
+from pose_utils import solve_cemara
 from gradio.helpers import Examples as GradioExamples
 from gradio.utils import get_cache_folder
 from pathlib import Path
 # Default values
 DEFAULT_CKPT_PATH = './checkpoints/spann3r.pth'
 DEFAULT_DUST3R_PATH = 'https://huggingface.co/camenduru/dust3r/resolve/main/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth'
         pcd.points = o3d.utility.Vector3dVector(centered_points)
         return pcd
+def center_mesh(mesh: o3d.geometry.TriangleMesh, normalize=False) -> o3d.geometry.TriangleMesh:
+    # Convert to numpy array
+    vertices = np.asarray(mesh.vertices)
+    # Compute centroid
+    centroid = np.mean(vertices, axis=0)
+    # Center the mesh
+    centered_vertices = vertices - centroid
+    if normalize:
+         # Compute the maximum distance from the center
+        max_distance = np.max(np.linalg.norm(centered_vertices, axis=1))
+        # Normalize the mesh
+        normalized_vertices = centered_vertices / max_distance
+        # Create a new mesh with the normalized vertices
+        normalized_mesh = o3d.geometry.TriangleMesh()
+        normalized_mesh.vertices = o3d.utility.Vector3dVector(normalized_vertices)
+        normalized_mesh.triangles = mesh.triangles
+        # If the original mesh has vertex colors, copy them
+        if mesh.has_vertex_colors():
+            normalized_mesh.vertex_colors = mesh.vertex_colors
+        # If the original mesh has vertex normals, normalize them
+        if mesh.has_vertex_normals():
+            vertex_normals = np.asarray(mesh.vertex_normals)
+            normalized_vertex_normals = vertex_normals / np.linalg.norm(vertex_normals, axis=1, keepdims=True)
+            normalized_mesh.vertex_normals = o3d.utility.Vector3dVector(normalized_vertex_normals)
+        return normalized_mesh
+    else:
+        # Update the mesh with the centered vertices
+        mesh.vertices = o3d.utility.Vector3dVector(centered_vertices)
+        return mesh
 @torch.no_grad()
 def reconstruct(video_path, conf_thresh, kf_every,
+                remove_background=False, enable_registration=True, output_3d_model=True):
     # Extract frames from video
     demo_path = extract_frames(video_path)
     # Process results
     pcds = []
+    cameras_all = []
+    last_focal = None
     for j, view in enumerate(batch):
         image = view['img'].permute(0, 2, 3, 1).cpu().numpy()[0]
         image = (image + 1) / 2
         combined_mask = (conf_sig > conf_thresh) & (mask > 0.5)
+        camera, last_focal = solve_cemara(torch.tensor(pts), torch.tensor(conf_sig) > 0.001,
+                                          "cuda", focal=last_focal)
         pcd = o3d.geometry.PointCloud()
         pcd.points = o3d.utility.Vector3dVector(pts[combined_mask])
         pcd.colors = o3d.utility.Vector3dVector(image[combined_mask])
         pcd.normals = o3d.utility.Vector3dVector(pts_normal[combined_mask])
         pcds.append(pcd)
+        cameras_all.append(camera)
     pcd_combined = combine_and_clean_point_clouds(pcds, voxel_size=0.001)
     o3d_geometry = point2mesh(pcd_combined)
+    o3d_geometry_centered = center_mesh(o3d_geometry, normalize=True)
     # Create coarse result
+    coarse_output_path = export_geometry(o3d_geometry_centered)
+    yield coarse_output_path, None
+    gs_output_path = tempfile.mktemp(suffix='.ply')
+    if enable_registration:
+        transformed_pcds, _, _ = improved_multiway_registration(pcds, voxel_size=0.01)
+        transformed_pcds = center_pcd(transformed_pcds)
+        point2gs(gs_output_path, transformed_pcds)
+    else:
+        point2gs(gs_output_path, pcd_combined)
+    if output_3d_model:
+        # Create 3D model result using gaussian splatting
+        yield coarse_output_path, gs_output_path
+    else:
+        gs_output_path = tempfile.mktemp(suffix='.ply')
+        render_video_path = render_frames(o3d_geometry, cameras_all, demo_path)
+        yield coarse_output_path, render_video_path
     # Clean up temporary directory
     os.system(f"rm -rf {demo_path}")
                 kf_every = gr.Slider(1, 30, step=1, value=1, label="Keyframe Interval")
             with gr.Row():
                 remove_background = gr.Checkbox(label="Remove Background", value=False)
+                enable_registration = gr.Checkbox(
+                    label="Enable Refinement",
+                    value=False,
+                    info="Improves alignment but takes longer"
+                )
+                output_3d_model = gr.Checkbox(
+                    label="Output Splat",
+                    value=True,
+                    info="Generate Splat (PLY) instead of video render"
+                )
             reconstruct_btn = gr.Button("Start Reconstruction")
         with gr.Column(scale=2):
             with gr.Tab("3D Models"):
                 with gr.Group():
+                    initial_model = gr.Model3D(
+                        label="Initial 3D Model",
+                        display_mode="solid",
+                        clear_color=[0.0, 0.0, 0.0, 0.0]
+                    )
                     gr.Markdown(
                         """
                         <div class="model-description">
                     )
                 with gr.Group():
+                    output_model = gr.File(
+                        label="Refined Result (Splat or Video)",
+                        file_types=[".ply", ".mp4"],
+                        file_count="single"
+                    )
                     gr.Markdown(
                         """
                         <div class="model-description">
+                        Downloads as either:
+                        - PLY file: Gaussin Splat Model (when "Output Splat" is enabled)
+                        - MP4 file: 360° rotating render video (when "Output Splat" is disabled)
+                        <br>Time: ~60 seconds with refinement, ~30 seconds without
                         </div>
                         """
                     )
     Examples(
         fn=reconstruct,
             for name in os.listdir(os.path.join("examples")) if name.endswith('.webm')
         ]),
         inputs=[video_input],
+        outputs=[initial_model, output_model],
         directory_name="examples_video",
         cache_examples=False,
     )
     reconstruct_btn.click(
         fn=reconstruct,
+        inputs=[video_input, conf_thresh, kf_every, remove_background, enable_registration, output_3d_model],
+        outputs=[initial_model, output_model]
     )
 if __name__ == "__main__":