Spaces:

hujiecpp
/

PE3R

Running on Zero

App Files Files Community

Jie Hu commited on Feb 20

Commit

43b0caa

1 Parent(s): 1d77203

init project

Browse files

Files changed (1) hide show

app.py +29 -24

app.py CHANGED Viewed

@@ -39,6 +39,8 @@ import torchvision.transforms as tvf
 silent = False
 def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
                                  cam_color=None, as_pointcloud=False,
@@ -81,6 +83,7 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
     if not silent:
         print('(exporting 3D scene to', outfile, ')')
     # scene.export(file_obj=outfile)
     return outfile
 # @spaces.GPU(duration=180)
@@ -242,7 +245,6 @@ def slerp_multiple(vectors, t_values):
     return interpolated_vector
 @torch.no_grad
-@spaces.GPU(duration=180)
 def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -297,24 +299,9 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
     return ret_mask
-@spaces.GPU(duration=180)
-def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
-                            as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
-                            scenegraph_type, winsize, refid):
-    """
-    from a list of images, run dust3r inference, global aligner.
-    then run get_3D_model_from_scene
-    """
-    if len(filelist) < 2:
-        raise gradio.Error("Please input at least 2 images.")
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    pe3r = Models(device)
-    images = Images(filelist=filelist, device=device)
-    # try:
     cog_seg_maps = []
     rev_cog_seg_maps = []
     inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
@@ -447,8 +434,25 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
             multi_view_clip_feats[i] = torch.zeros((1024))
     multi_view_clip_feats[mask_num] = torch.zeros((1024))
-    cog_feats = multi_view_clip_feats
     imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
     # except Exception as e:
         # rev_cog_seg_maps = []
@@ -495,10 +499,11 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
         scene.ori_imgs = ori_imgs
         print(e)
     outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
                                       clean_depth, transparent_cams, cam_size)
     # also return rgb, depth and confidence imgs
     # depth is normalized with the max value for all images
     # we apply the jet colormap on the confidence maps
@@ -603,11 +608,11 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
                 clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
                 transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras", visible=False)
-            with gradio.Row():
-                text_input = gradio.Textbox(label="Query Text")
-                threshold = gradio.Slider(label="Threshold", value=0.85, minimum=0.0, maximum=1.0, step=0.01)
-            find_btn = gradio.Button("Find")
             outmodel = gradio.Model3D()
             # outgallery = gradio.Gallery(label='rgb,depth,confidence', columns=3, height="100%",

 silent = False
+pe3r = Models('cuda' if torch.cuda.is_available() else 'cpu')
 def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
                                  cam_color=None, as_pointcloud=False,
     if not silent:
         print('(exporting 3D scene to', outfile, ')')
     # scene.export(file_obj=outfile)
+    print('ttttt')
     return outfile
 # @spaces.GPU(duration=180)
     return interpolated_vector
 @torch.no_grad
 def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     return ret_mask
+@torch.no_grad
+def get_cog_feats(images):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     cog_seg_maps = []
     rev_cog_seg_maps = []
     inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
             multi_view_clip_feats[i] = torch.zeros((1024))
     multi_view_clip_feats[mask_num] = torch.zeros((1024))
+    return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
+@spaces.GPU(duration=180)
+def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
+                            as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
+                            scenegraph_type, winsize, refid):
+    """
+    from a list of images, run dust3r inference, global aligner.
+    then run get_3D_model_from_scene
+    """
+    if len(filelist) < 2:
+        raise gradio.Error("Please input at least 2 images.")
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    images = Images(filelist=filelist, device=device)
+    # try:
+    cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images)
     imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
     # except Exception as e:
         # rev_cog_seg_maps = []
         scene.ori_imgs = ori_imgs
         print(e)
+    print('a')
     outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
                                       clean_depth, transparent_cams, cam_size)
+    print('b')
     # also return rgb, depth and confidence imgs
     # depth is normalized with the max value for all images
     # we apply the jet colormap on the confidence maps
                 clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
                 transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras", visible=False)
+            # with gradio.Row():
+            #     text_input = gradio.Textbox(label="Query Text")
+            #     threshold = gradio.Slider(label="Threshold", value=0.85, minimum=0.0, maximum=1.0, step=0.01)
+            # find_btn = gradio.Button("Find")
             outmodel = gradio.Model3D()
             # outgallery = gradio.Gallery(label='rgb,depth,confidence', columns=3, height="100%",