init project
Browse files- modules/pe3r/demo.py +6 -6
modules/pe3r/demo.py
CHANGED
@@ -244,13 +244,13 @@ def get_mask_from_img_sam1(mobilesamv2, yolov8, sam1_image, yolov8_image, origin
|
|
244 |
input_boxes1 = obj_results[0].boxes.xyxy
|
245 |
input_boxes1 = input_boxes1.cpu().numpy()
|
246 |
input_boxes1 = transform.apply_boxes(input_boxes1, original_size)
|
247 |
-
input_boxes = torch.from_numpy(input_boxes1).
|
248 |
|
249 |
# obj_results = yolov8(yolov8_image,device=device,retina_masks=False,imgsz=512,conf=0.25,iou=0.9,verbose=False)
|
250 |
# input_boxes2 = obj_results[0].boxes.xyxy
|
251 |
# input_boxes2 = input_boxes2.cpu().numpy()
|
252 |
# input_boxes2 = transform.apply_boxes(input_boxes2, original_size)
|
253 |
-
# input_boxes2 = torch.from_numpy(input_boxes2).
|
254 |
|
255 |
# input_boxes = torch.cat((input_boxes1, input_boxes2), dim=0)
|
256 |
|
@@ -383,7 +383,7 @@ def get_cog_feats(images, pe3r, device):
|
|
383 |
seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
|
384 |
|
385 |
inputs = pe3r.siglip_processor(images=seg_imgs, return_tensors="pt")
|
386 |
-
inputs = {key: value.to(
|
387 |
|
388 |
image_features = pe3r.siglip.get_image_features(**inputs)
|
389 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
@@ -512,12 +512,12 @@ def get_reconstructed_scene(outdir, pe3r, device, silent, filelist, schedule, ni
|
|
512 |
return scene, outfile, imgs
|
513 |
|
514 |
@spaces.GPU(duration=180)
|
515 |
-
def get_3D_object_from_scene(outdir, pe3r, silent, text, threshold, scene, min_conf_thr, as_pointcloud,
|
516 |
mask_sky, clean_depth, transparent_cams, cam_size):
|
517 |
|
518 |
texts = [text]
|
519 |
inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
|
520 |
-
inputs = {key: value.to(
|
521 |
with torch.no_grad():
|
522 |
text_feats =pe3r.siglip.get_text_features(**inputs)
|
523 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
@@ -563,7 +563,7 @@ def main_demo(tmpdirname, server_name, server_port, silent=False):
|
|
563 |
|
564 |
recon_fun = functools.partial(get_reconstructed_scene, tmpdirname, pe3r, device, silent)
|
565 |
model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname, silent)
|
566 |
-
get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname, pe3r, silent)
|
567 |
|
568 |
with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
|
569 |
# scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
|
|
|
244 |
input_boxes1 = obj_results[0].boxes.xyxy
|
245 |
input_boxes1 = input_boxes1.cpu().numpy()
|
246 |
input_boxes1 = transform.apply_boxes(input_boxes1, original_size)
|
247 |
+
input_boxes = torch.from_numpy(input_boxes1).to(device)
|
248 |
|
249 |
# obj_results = yolov8(yolov8_image,device=device,retina_masks=False,imgsz=512,conf=0.25,iou=0.9,verbose=False)
|
250 |
# input_boxes2 = obj_results[0].boxes.xyxy
|
251 |
# input_boxes2 = input_boxes2.cpu().numpy()
|
252 |
# input_boxes2 = transform.apply_boxes(input_boxes2, original_size)
|
253 |
+
# input_boxes2 = torch.from_numpy(input_boxes2).to(device)
|
254 |
|
255 |
# input_boxes = torch.cat((input_boxes1, input_boxes2), dim=0)
|
256 |
|
|
|
383 |
seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
|
384 |
|
385 |
inputs = pe3r.siglip_processor(images=seg_imgs, return_tensors="pt")
|
386 |
+
inputs = {key: value.to(device) for key, value in inputs.items()}
|
387 |
|
388 |
image_features = pe3r.siglip.get_image_features(**inputs)
|
389 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
|
|
512 |
return scene, outfile, imgs
|
513 |
|
514 |
@spaces.GPU(duration=180)
|
515 |
+
def get_3D_object_from_scene(outdir, pe3r, silent, device, text, threshold, scene, min_conf_thr, as_pointcloud,
|
516 |
mask_sky, clean_depth, transparent_cams, cam_size):
|
517 |
|
518 |
texts = [text]
|
519 |
inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
|
520 |
+
inputs = {key: value.to(device) for key, value in inputs.items()}
|
521 |
with torch.no_grad():
|
522 |
text_feats =pe3r.siglip.get_text_features(**inputs)
|
523 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
|
|
563 |
|
564 |
recon_fun = functools.partial(get_reconstructed_scene, tmpdirname, pe3r, device, silent)
|
565 |
model_from_scene_fun = functools.partial(get_3D_model_from_scene, tmpdirname, silent)
|
566 |
+
get_3D_object_from_scene_fun = functools.partial(get_3D_object_from_scene, tmpdirname, pe3r, silent, device)
|
567 |
|
568 |
with gradio.Blocks(css=""".gradio-container {margin: 0 !important; min-width: 100%};""", title="PE3R Demo") as demo:
|
569 |
# scene state is save so that you can change conf_thr, cam_size... without rerunning the inference
|