hujiecpp commited on
Commit
38a29f3
·
1 Parent(s): da2ad26

init project

Browse files
Files changed (2) hide show
  1. app.py +12 -5
  2. modules/pe3r/models.py +3 -3
app.py CHANGED
@@ -37,6 +37,8 @@ from modules.mobilesamv2.utils.transforms import ResizeLongestSide
37
  from modules.pe3r.models import Models
38
  import torchvision.transforms as tvf
39
 
 
 
40
  silent = False
41
  # device = 'cuda' if torch.cuda.is_available() else 'cpu'
42
  pe3r = Models('cpu') #
@@ -304,6 +306,10 @@ def get_cog_feats(images):
304
 
305
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
306
  pe3r.sam2.to(device)
 
 
 
 
307
  # pe3r.siglip_processor.to(device)
308
  # pe3r.siglip.to(device)
309
 
@@ -399,10 +405,10 @@ def get_cog_feats(images):
399
  seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
400
  seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
401
 
402
- inputs = pe3r.siglip_processor(images=seg_imgs, return_tensors="pt")
403
  inputs = {key: value.to(device) for key, value in inputs.items()}
404
 
405
- image_features = pe3r.siglip.get_image_features(**inputs)
406
  image_features = image_features / image_features.norm(dim=-1, keepdim=True)
407
  image_features = image_features.detach().cpu()
408
 
@@ -519,13 +525,14 @@ def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_po
519
 
520
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
521
  # pe3r.siglip_tokenizer.to(device)
522
-
 
523
 
524
  texts = [text]
525
- inputs = pe3r.siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
526
  inputs = {key: value.to(device) for key, value in inputs.items()}
527
  with torch.no_grad():
528
- text_feats =pe3r.siglip.get_text_features(**inputs)
529
  text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
530
  scene.render_image(text_feats, threshold)
531
  scene.ori_imgs = scene.rendered_imgs
 
37
  from modules.pe3r.models import Models
38
  import torchvision.transforms as tvf
39
 
40
+ from transformers import AutoTokenizer, AutoModel, AutoProcessor, SamModel
41
+
42
  silent = False
43
  # device = 'cuda' if torch.cuda.is_available() else 'cpu'
44
  pe3r = Models('cpu') #
 
306
 
307
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
308
  pe3r.sam2.to(device)
309
+
310
+ siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
311
+ siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
312
+
313
  # pe3r.siglip_processor.to(device)
314
  # pe3r.siglip.to(device)
315
 
 
405
  seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
406
  seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
407
 
408
+ inputs = siglip_processor(images=seg_imgs, return_tensors="pt")
409
  inputs = {key: value.to(device) for key, value in inputs.items()}
410
 
411
+ image_features = siglip.get_image_features(**inputs)
412
  image_features = image_features / image_features.norm(dim=-1, keepdim=True)
413
  image_features = image_features.detach().cpu()
414
 
 
525
 
526
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
527
  # pe3r.siglip_tokenizer.to(device)
528
+ siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
529
+ siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
530
 
531
  texts = [text]
532
+ inputs = siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
533
  inputs = {key: value.to(device) for key, value in inputs.items()}
534
  with torch.no_grad():
535
+ text_feats =siglip.get_text_features(**inputs)
536
  text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
537
  scene.render_image(text_feats, threshold)
538
  scene.ori_imgs = scene.rendered_imgs
modules/pe3r/models.py CHANGED
@@ -47,6 +47,6 @@ class Models:
47
  self.yolov8 = ObjectAwareModel(YOLO8_CKP)
48
 
49
  # -- siglip --
50
- self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
51
- self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
52
- self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
 
47
  self.yolov8 = ObjectAwareModel(YOLO8_CKP)
48
 
49
  # -- siglip --
50
+ # self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
51
+ # self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
52
+ # self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)