Jie Hu commited on
Commit
43b0caa
·
1 Parent(s): 1d77203

init project

Browse files
Files changed (1) hide show
  1. app.py +29 -24
app.py CHANGED
@@ -39,6 +39,8 @@ import torchvision.transforms as tvf
39
 
40
 
41
  silent = False
 
 
42
 
43
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
44
  cam_color=None, as_pointcloud=False,
@@ -81,6 +83,7 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
81
  if not silent:
82
  print('(exporting 3D scene to', outfile, ')')
83
  # scene.export(file_obj=outfile)
 
84
  return outfile
85
 
86
  # @spaces.GPU(duration=180)
@@ -242,7 +245,6 @@ def slerp_multiple(vectors, t_values):
242
  return interpolated_vector
243
 
244
  @torch.no_grad
245
- @spaces.GPU(duration=180)
246
  def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
247
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
248
 
@@ -297,24 +299,9 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
297
 
298
  return ret_mask
299
 
300
- @spaces.GPU(duration=180)
301
- def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
302
- as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
303
- scenegraph_type, winsize, refid):
304
- """
305
- from a list of images, run dust3r inference, global aligner.
306
- then run get_3D_model_from_scene
307
- """
308
- if len(filelist) < 2:
309
- raise gradio.Error("Please input at least 2 images.")
310
-
311
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
312
-
313
- pe3r = Models(device)
314
-
315
- images = Images(filelist=filelist, device=device)
316
-
317
- # try:
318
  cog_seg_maps = []
319
  rev_cog_seg_maps = []
320
  inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
@@ -447,8 +434,25 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
447
  multi_view_clip_feats[i] = torch.zeros((1024))
448
  multi_view_clip_feats[mask_num] = torch.zeros((1024))
449
 
450
- cog_feats = multi_view_clip_feats
451
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
453
  # except Exception as e:
454
  # rev_cog_seg_maps = []
@@ -495,10 +499,11 @@ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
495
  scene.ori_imgs = ori_imgs
496
  print(e)
497
 
 
498
 
499
  outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
500
  clean_depth, transparent_cams, cam_size)
501
-
502
  # also return rgb, depth and confidence imgs
503
  # depth is normalized with the max value for all images
504
  # we apply the jet colormap on the confidence maps
@@ -603,11 +608,11 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
603
  clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
604
  transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras", visible=False)
605
 
606
- with gradio.Row():
607
- text_input = gradio.Textbox(label="Query Text")
608
- threshold = gradio.Slider(label="Threshold", value=0.85, minimum=0.0, maximum=1.0, step=0.01)
609
 
610
- find_btn = gradio.Button("Find")
611
 
612
  outmodel = gradio.Model3D()
613
  # outgallery = gradio.Gallery(label='rgb,depth,confidence', columns=3, height="100%",
 
39
 
40
 
41
  silent = False
42
+ pe3r = Models('cuda' if torch.cuda.is_available() else 'cpu')
43
+
44
 
45
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
46
  cam_color=None, as_pointcloud=False,
 
83
  if not silent:
84
  print('(exporting 3D scene to', outfile, ')')
85
  # scene.export(file_obj=outfile)
86
+ print('ttttt')
87
  return outfile
88
 
89
  # @spaces.GPU(duration=180)
 
245
  return interpolated_vector
246
 
247
  @torch.no_grad
 
248
  def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
249
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
250
 
 
299
 
300
  return ret_mask
301
 
302
+ @torch.no_grad
303
+ def get_cog_feats(images):
 
 
 
 
 
 
 
 
 
304
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
 
 
 
305
  cog_seg_maps = []
306
  rev_cog_seg_maps = []
307
  inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
 
434
  multi_view_clip_feats[i] = torch.zeros((1024))
435
  multi_view_clip_feats[mask_num] = torch.zeros((1024))
436
 
437
+ return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
438
 
439
+ @spaces.GPU(duration=180)
440
+ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
441
+ as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
442
+ scenegraph_type, winsize, refid):
443
+ """
444
+ from a list of images, run dust3r inference, global aligner.
445
+ then run get_3D_model_from_scene
446
+ """
447
+ if len(filelist) < 2:
448
+ raise gradio.Error("Please input at least 2 images.")
449
+
450
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
451
+
452
+ images = Images(filelist=filelist, device=device)
453
+
454
+ # try:
455
+ cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images)
456
  imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
457
  # except Exception as e:
458
  # rev_cog_seg_maps = []
 
499
  scene.ori_imgs = ori_imgs
500
  print(e)
501
 
502
+ print('a')
503
 
504
  outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
505
  clean_depth, transparent_cams, cam_size)
506
+ print('b')
507
  # also return rgb, depth and confidence imgs
508
  # depth is normalized with the max value for all images
509
  # we apply the jet colormap on the confidence maps
 
608
  clean_depth = gradio.Checkbox(value=True, label="Clean-up depthmaps", visible=False)
609
  transparent_cams = gradio.Checkbox(value=True, label="Transparent cameras", visible=False)
610
 
611
+ # with gradio.Row():
612
+ # text_input = gradio.Textbox(label="Query Text")
613
+ # threshold = gradio.Slider(label="Threshold", value=0.85, minimum=0.0, maximum=1.0, step=0.01)
614
 
615
+ # find_btn = gradio.Button("Find")
616
 
617
  outmodel = gradio.Model3D()
618
  # outgallery = gradio.Gallery(label='rgb,depth,confidence', columns=3, height="100%",