Spaces:

Stable-X
/

StableRecon

Runtime error

App Files Files Community

Stable-X commited on Nov 26, 2024

Commit

2bca3f5

1 Parent(s): 549d99a

feat: Clean codes

Browse files

Files changed (2) hide show

app.py +0 -91
spann3r/model.py +5 -2

app.py CHANGED Viewed

@@ -161,8 +161,6 @@ def load_model(ckpt_path, device):
     return model
 model = load_model(DEFAULT_CKPT_PATH, DEFAULT_DEVICE)
-mast3r_model = AsymmetricMASt3R.from_pretrained(DEFAULT_MAST3R_PATH).to(DEFAULT_DEVICE)
-mast3r_model.eval()
 birefnet = AutoModelForImageSegmentation.from_pretrained('zhengpeng7/BiRefNet', trust_remote_code=True)
 birefnet.to(DEFAULT_DEVICE)
@@ -386,87 +384,6 @@ def get_keyframes(temp_dir: str, kf_every: int = 10):
             raise ValueError(f"Not enough frames found in {temp_dir}. Need at least 2 frames for reconstruction.")
     return keyframe_paths
-from mast3r.cloud_opt.sparse_ga import sparse_global_alignment
-from mast3r.cloud_opt.tsdf_optimizer import TSDFPostProcess
-from dust3r.utils.image import load_images
-from dust3r.image_pairs import make_pairs
-from dust3r.utils.device import to_numpy
-def invert_matrix(mat):
-    """Invert a torch or numpy matrix."""
-    if isinstance(mat, torch.Tensor):
-        return torch.linalg.inv(mat)
-    if isinstance(mat, np.ndarray):
-        return np.linalg.inv(mat)
-    raise ValueError(f'Unsupported matrix type: {type(mat)}')
-def refine(
-    video_path: str,
-    conf_thresh: float = 5.0,
-    kf_every: int = 30,
-    remove_background: bool = False,
-    enable_registration: bool = True,
-    output_3d_model: bool = True
-) -> dict:
-    # Extract keyframes from video
-    temp_dir = extract_frames(video_path)
-    keyframe_paths = get_keyframes(temp_dir, kf_every*3)
-    image_size = 512
-    images = load_images(keyframe_paths, size=image_size)
-    # Create output directory
-    output_dir = tempfile.mkdtemp()
-    # Generate pairs and run inference
-    pairs = make_pairs(images, scene_graph='complete', prefilter=None, symmetrize=True)
-    cache_dir = os.path.join(output_dir, 'cache')
-    if os.path.exists(cache_dir):
-        os.system(f'rm -rf {cache_dir}')
-    scene = sparse_global_alignment(keyframe_paths, pairs, cache_dir,
-                                    mast3r_model, lr1=0.07, niter1=500, lr2=0.014,
-                                    niter2=200 if enable_registration else 0, device=DEFAULT_DEVICE,
-                                    opt_depth=True if enable_registration else False, shared_intrinsics=True,
-                                    matching_conf_thr=5.)
-    # Extract scene information
-    imgs = np.array(scene.imgs)
-    tsdf = TSDFPostProcess(scene, TSDF_thresh=0)
-    pts3d, _, confs = tsdf.get_dense_pts3d(clean_depth=True)
-    masks = np.array(to_numpy([c > 1.5 for c in confs]))
-    pcds = []
-    for pts, conf_mask, image in zip(pts3d, masks, imgs):
-        if remove_background:
-            mask = generate_mask(image)
-        else:
-            mask = np.ones_like(conf_mask)
-        combined_mask = conf_mask & (mask > 0.5)
-        pts = pts.reshape(combined_mask.shape[0], combined_mask.shape[1], 3)
-        pts_normal = pts2normal(pts).cpu().numpy()
-        pts = pts.cpu().numpy()
-        pcd = o3d.geometry.PointCloud()
-        pcd.points = o3d.utility.Vector3dVector(pts[combined_mask] / 5)
-        pcd.colors = o3d.utility.Vector3dVector(image[combined_mask])
-        pcd.normals = o3d.utility.Vector3dVector(pts_normal[combined_mask])
-        pcds.append(pcd)
-    pcd_combined = combine_and_clean_point_clouds(pcds, voxel_size=0.001)
-    o3d_geometry = point2mesh(pcd_combined, depth=9)
-    o3d_geometry_centered = center_mesh(o3d_geometry, normalize=True)
-    # Create coarse result
-    coarse_output_path = export_geometry(o3d_geometry_centered)
-    if output_3d_model:
-        gs_output_path = tempfile.mktemp(suffix='.ply')
-        point2gs(gs_output_path, pcd_combined)
-        return coarse_output_path, [gs_output_path]
-    else:
-        pcd_output_path = export_geometry(pcd_combined, file_format='ply')
-        return coarse_output_path, [pcd_output_path]
 @torch.no_grad()
 def reconstruct(video_path, conf_thresh, kf_every,
@@ -661,7 +578,6 @@ with gr.Blocks(
                     info="Generate Splat (PLY) instead of Point Cloud (PLY)"
                 )
             reconstruct_btn = gr.Button("Start Reconstruction")
-            refine_btn = gr.Button("Start Refinement")
         with gr.Column(scale=2):
             with gr.Tab("3D Models"):
@@ -695,12 +611,5 @@ with gr.Blocks(
         outputs=[initial_model, output_model]
     )
-    refine_btn.click(
-        fn=refine,
-        inputs=[video_input, conf_thresh, kf_every, remove_background, enable_registration, output_3d_model],
-        outputs=[initial_model, output_model]
-    )
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0")

     return model
 model = load_model(DEFAULT_CKPT_PATH, DEFAULT_DEVICE)
 birefnet = AutoModelForImageSegmentation.from_pretrained('zhengpeng7/BiRefNet', trust_remote_code=True)
 birefnet.to(DEFAULT_DEVICE)
             raise ValueError(f"Not enough frames found in {temp_dir}. Need at least 2 frames for reconstruction.")
     return keyframe_paths
 @torch.no_grad()
 def reconstruct(video_path, conf_thresh, kf_every,
                     info="Generate Splat (PLY) instead of Point Cloud (PLY)"
                 )
             reconstruct_btn = gr.Button("Start Reconstruction")
         with gr.Column(scale=2):
             with gr.Tab("3D Models"):
         outputs=[initial_model, output_model]
     )
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0")

spann3r/model.py CHANGED Viewed

@@ -201,7 +201,7 @@ class SpatialMemory():
         print('Memory pruned:', num_mem_b, '->', num_mem_a)
 class Spann3R(nn.Module):
     def __init__(self, dus3r_name="./checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth",
                  use_feat=False, mem_pos_enc=False, memory_dropout=0.15):
@@ -211,7 +211,10 @@ class Spann3R(nn.Module):
         self.mem_pos_enc = mem_pos_enc
         # DUSt3R
-        self.dust3r = AsymmetricCroCo3DStereo.from_pretrained(dus3r_name, landscape_only=True)
         # Memory encoder
         self.set_memory_encoder(enc_embed_dim=768 if use_feat else 1024, memory_dropout=memory_dropout)

         print('Memory pruned:', num_mem_b, '->', num_mem_a)
+import math
 class Spann3R(nn.Module):
     def __init__(self, dus3r_name="./checkpoints/DUSt3R_ViTLarge_BaseDecoder_512_dpt.pth",
                  use_feat=False, mem_pos_enc=False, memory_dropout=0.15):
         self.mem_pos_enc = mem_pos_enc
         # DUSt3R
+        self.dust3r = AsymmetricCroCo3DStereo(enc_depth=24, dec_depth=12, enc_embed_dim=1024, dec_embed_dim=768,
+                    enc_num_heads=16, dec_num_heads=12, pos_embed='RoPE100', patch_embed_cls='PatchEmbedDust3R',
+                    img_size=(512, 512), head_type='dpt', output_mode='pts3d', depth_mode=('exp', -math.inf, math.inf),
+                    conf_mode=('exp', 1, math.inf), landscape_only=True)
         # Memory encoder
         self.set_memory_encoder(enc_embed_dim=768 if use_feat else 1024, memory_dropout=memory_dropout)