hujiecpp commited on
Commit
2a23e85
·
1 Parent(s): 564a5c5

init project

Browse files
Files changed (1) hide show
  1. app.py +54 -54
app.py CHANGED
@@ -45,7 +45,7 @@ pe3r = Models(device)
45
 
46
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
47
  cam_color=None, as_pointcloud=False,
48
- transparent_cams=False, silent=False):
49
  assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
50
  pts3d = to_numpy(pts3d)
51
  imgs = to_numpy(imgs)
@@ -87,7 +87,7 @@ def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world,
87
  return outfile
88
 
89
  # # @spaces.GPU(duration=180)
90
- def get_3D_model_from_scene(outdir, silent, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
91
  clean_depth=False, transparent_cams=False, cam_size=0.05):
92
  """
93
  extract 3D_model (glb file) from a reconstructed scene
@@ -245,7 +245,7 @@ def slerp_multiple(vectors, t_values):
245
  return interpolated_vector
246
 
247
  @torch.no_grad
248
- def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform, device):
249
  sam_mask=[]
250
  img_area = original_size[0] * original_size[1]
251
 
@@ -298,7 +298,7 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
298
  return ret_mask
299
 
300
  @torch.no_grad
301
- def get_cog_feats(images, device):
302
  cog_seg_maps = []
303
  rev_cog_seg_maps = []
304
  inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
@@ -309,7 +309,7 @@ def get_cog_feats(images, device):
309
  np_images = images.np_images
310
  np_images_size = images.np_images_size
311
 
312
- sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform, device)
313
  for mask in sam1_masks:
314
  _, _, _ = pe3r.sam2.add_new_mask(
315
  inference_state=inference_state,
@@ -331,7 +331,7 @@ def get_cog_feats(images, device):
331
  if out_frame_idx == 0:
332
  continue
333
 
334
- sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform, device)
335
 
336
  for sam1_mask in sam1_masks:
337
  flg = 1
@@ -434,7 +434,7 @@ def get_cog_feats(images, device):
434
  return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
435
 
436
  @spaces.GPU(duration=180)
437
- def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, min_conf_thr,
438
  as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
439
  scenegraph_type, winsize, refid):
440
  """
@@ -447,7 +447,7 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
447
  images = Images(filelist=filelist, device=device)
448
 
449
  # try:
450
- cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images, device)
451
  imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
452
  # except Exception as e:
453
  # rev_cog_seg_maps = []
@@ -495,7 +495,7 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
495
  print(e)
496
 
497
 
498
- outfile = get_3D_model_from_scene(outdir, silent, scene, min_conf_thr, as_pointcloud, mask_sky,
499
  clean_depth, transparent_cams, cam_size)
500
 
501
  # also return rgb, depth and confidence imgs
@@ -519,21 +519,21 @@ def get_reconstructed_scene(outdir, device, silent, filelist, schedule, niter, m
519
 
520
  return scene, outfile, imgs
521
 
522
- # @spaces.GPU(duration=180)
523
- # def get_3D_object_from_scene(outdir, pe3r, silent, device, text, threshold, scene, min_conf_thr, as_pointcloud,
524
- # mask_sky, clean_depth, transparent_cams, cam_size):
525
 
526
- # texts = [text]
527
- # inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
528
- # inputs = {key: value.to(device) for key, value in inputs.items()}
529
- # with torch.no_grad():
530
- # text_feats =pe3r.siglip.get_text_features(**inputs)
531
- # text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
532
- # scene.render_image(text_feats, threshold)
533
- # scene.ori_imgs = scene.rendered_imgs
534
- # outfile = get_3D_model_from_scene(outdir, silent, scene, min_conf_thr, as_pointcloud, mask_sky,
535
- # clean_depth, transparent_cams, cam_size)
536
- # return outfile
537
 
538
 
539
  def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
@@ -558,9 +558,9 @@ def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
558
 
559
 
560
  with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
561
- recon_fun = functools.partial(get_reconstructed_scene, tmpdirname, device, silent)
562
- # model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname, silent)
563
- # get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname, pe3r, silent, device)
564
 
565
  with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
566
  # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
@@ -622,32 +622,32 @@ with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
622
  mask_sky, clean_depth, transparent_cams, cam_size,
623
  scenegraph_type, winsize, refid],
624
  outputs=[scene, outmodel, outgallery])
625
- # min_conf_thr.release(fn=model_from_scene_fun,
626
- # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
627
- # clean_depth, transparent_cams, cam_size],
628
- # outputs=outmodel)
629
- # cam_size.change(fn=model_from_scene_fun,
630
- # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
631
- # clean_depth, transparent_cams, cam_size],
632
- # outputs=outmodel)
633
- # as_pointcloud.change(fn=model_from_scene_fun,
634
- # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
635
- # clean_depth, transparent_cams, cam_size],
636
- # outputs=outmodel)
637
- # mask_sky.change(fn=model_from_scene_fun,
638
- # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
639
- # clean_depth, transparent_cams, cam_size],
640
- # outputs=outmodel)
641
- # clean_depth.change(fn=model_from_scene_fun,
642
- # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
643
- # clean_depth, transparent_cams, cam_size],
644
- # outputs=outmodel)
645
- # transparent_cams.change(model_from_scene_fun,
646
- # inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
647
- # clean_depth, transparent_cams, cam_size],
648
- # outputs=outmodel)
649
- # find_btn.click(fn=get_3D_object_from_scene_fun,
650
- # inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
651
- # clean_depth, transparent_cams, cam_size],
652
- # outputs=outmodel)
653
  demo.launch(show_error=True, share=None, server_name=None, server_port=None)
 
45
 
46
  def _convert_scene_output_to_glb(outdir, imgs, pts3d, mask, focals, cams2world, cam_size=0.05,
47
  cam_color=None, as_pointcloud=False,
48
+ transparent_cams=False):
49
  assert len(pts3d) == len(mask) <= len(imgs) <= len(cams2world) == len(focals)
50
  pts3d = to_numpy(pts3d)
51
  imgs = to_numpy(imgs)
 
87
  return outfile
88
 
89
  # # @spaces.GPU(duration=180)
90
+ def get_3D_model_from_scene(outdir, scene, min_conf_thr=3, as_pointcloud=False, mask_sky=False,
91
  clean_depth=False, transparent_cams=False, cam_size=0.05):
92
  """
93
  extract 3D_model (glb file) from a reconstructed scene
 
245
  return interpolated_vector
246
 
247
  @torch.no_grad
248
+ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, original_size, input_size, transform):
249
  sam_mask=[]
250
  img_area = original_size[0] * original_size[1]
251
 
 
298
  return ret_mask
299
 
300
  @torch.no_grad
301
+ def get_cog_feats(images):
302
  cog_seg_maps = []
303
  rev_cog_seg_maps = []
304
  inference_state = pe3r.sam2.init_state(images=images.sam2_images, video_height=images.sam2_video_size[0], video_width=images.sam2_video_size[1])
 
309
  np_images = images.np_images
310
  np_images_size = images.np_images_size
311
 
312
+ sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[0], np_images[0], np_images_size[0], sam1_images_size[0], images.sam1_transform)
313
  for mask in sam1_masks:
314
  _, _, _ = pe3r.sam2.add_new_mask(
315
  inference_state=inference_state,
 
331
  if out_frame_idx == 0:
332
  continue
333
 
334
+ sam1_masks = get_mask_from_img_sam1(pe3r.mobilesamv2, pe3r.yolov8, sam1_images[out_frame_idx], np_images[out_frame_idx], np_images_size[out_frame_idx], sam1_images_size[out_frame_idx], images.sam1_transform)
335
 
336
  for sam1_mask in sam1_masks:
337
  flg = 1
 
434
  return cog_seg_maps, rev_cog_seg_maps, multi_view_clip_feats
435
 
436
  @spaces.GPU(duration=180)
437
+ def get_reconstructed_scene(outdir, filelist, schedule, niter, min_conf_thr,
438
  as_pointcloud, mask_sky, clean_depth, transparent_cams, cam_size,
439
  scenegraph_type, winsize, refid):
440
  """
 
447
  images = Images(filelist=filelist, device=device)
448
 
449
  # try:
450
+ cog_seg_maps, rev_cog_seg_maps, cog_feats = get_cog_feats(images)
451
  imgs = load_images(images, rev_cog_seg_maps, size=512, verbose=not silent)
452
  # except Exception as e:
453
  # rev_cog_seg_maps = []
 
495
  print(e)
496
 
497
 
498
+ outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
499
  clean_depth, transparent_cams, cam_size)
500
 
501
  # also return rgb, depth and confidence imgs
 
519
 
520
  return scene, outfile, imgs
521
 
522
+ @spaces.GPU(duration=180)
523
+ def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_pointcloud,
524
+ mask_sky, clean_depth, transparent_cams, cam_size):
525
 
526
+ texts = [text]
527
+ inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
528
+ inputs = {key: value.to(device) for key, value in inputs.items()}
529
+ with torch.no_grad():
530
+ text_feats =pe3r.siglip.get_text_features(**inputs)
531
+ text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
532
+ scene.render_image(text_feats, threshold)
533
+ scene.ori_imgs = scene.rendered_imgs
534
+ outfile = get_3D_model_from_scene(outdir, scene, min_conf_thr, as_pointcloud, mask_sky,
535
+ clean_depth, transparent_cams, cam_size)
536
+ return outfile
537
 
538
 
539
  def set_scenegraph_options(inputfiles, winsize, refid, scenegraph_type):
 
558
 
559
 
560
  with tempfile.TemporaryDirectory(suffix='pe3r_gradio_demo') as tmpdirname:
561
+ recon_fun = functools.partial(get_reconstructed_scene, tmpdirname)
562
+ model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname)
563
+ get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname)
564
 
565
  with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
566
  # scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
 
622
  mask_sky, clean_depth, transparent_cams, cam_size,
623
  scenegraph_type, winsize, refid],
624
  outputs=[scene, outmodel, outgallery])
625
+ min_conf_thr.release(fn=model_from_scene_fun,
626
+ inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
627
+ clean_depth, transparent_cams, cam_size],
628
+ outputs=outmodel)
629
+ cam_size.change(fn=model_from_scene_fun,
630
+ inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
631
+ clean_depth, transparent_cams, cam_size],
632
+ outputs=outmodel)
633
+ as_pointcloud.change(fn=model_from_scene_fun,
634
+ inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
635
+ clean_depth, transparent_cams, cam_size],
636
+ outputs=outmodel)
637
+ mask_sky.change(fn=model_from_scene_fun,
638
+ inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
639
+ clean_depth, transparent_cams, cam_size],
640
+ outputs=outmodel)
641
+ clean_depth.change(fn=model_from_scene_fun,
642
+ inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
643
+ clean_depth, transparent_cams, cam_size],
644
+ outputs=outmodel)
645
+ transparent_cams.change(model_from_scene_fun,
646
+ inputs=[scene, min_conf_thr, as_pointcloud, mask_sky,
647
+ clean_depth, transparent_cams, cam_size],
648
+ outputs=outmodel)
649
+ find_btn.click(fn=get_3D_object_from_scene_fun,
650
+ inputs=[text_input, threshold, scene, min_conf_thr, as_pointcloud, mask_sky,
651
+ clean_depth, transparent_cams, cam_size],
652
+ outputs=outmodel)
653
  demo.launch(show_error=True, share=None, server_name=None, server_port=None)