init project
Browse files- app.py +12 -5
- modules/pe3r/models.py +3 -3
app.py
CHANGED
@@ -37,6 +37,8 @@ from modules.mobilesamv2.utils.transforms import ResizeLongestSide
|
|
37 |
from modules.pe3r.models import Models
|
38 |
import torchvision.transforms as tvf
|
39 |
|
|
|
|
|
40 |
silent = False
|
41 |
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
42 |
pe3r = Models('cpu') #
|
@@ -304,6 +306,10 @@ def get_cog_feats(images):
|
|
304 |
|
305 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
306 |
pe3r.sam2.to(device)
|
|
|
|
|
|
|
|
|
307 |
# pe3r.siglip_processor.to(device)
|
308 |
# pe3r.siglip.to(device)
|
309 |
|
@@ -399,10 +405,10 @@ def get_cog_feats(images):
|
|
399 |
seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
|
400 |
seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
|
401 |
|
402 |
-
inputs =
|
403 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
404 |
|
405 |
-
image_features =
|
406 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
407 |
image_features = image_features.detach().cpu()
|
408 |
|
@@ -519,13 +525,14 @@ def get_3D_object_from_scene(outdir, text, threshold, scene, min_conf_thr, as_po
|
|
519 |
|
520 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
521 |
# pe3r.siglip_tokenizer.to(device)
|
522 |
-
|
|
|
523 |
|
524 |
texts = [text]
|
525 |
-
inputs =
|
526 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
527 |
with torch.no_grad():
|
528 |
-
text_feats =
|
529 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
530 |
scene.render_image(text_feats, threshold)
|
531 |
scene.ori_imgs = scene.rendered_imgs
|
|
|
37 |
from modules.pe3r.models import Models
|
38 |
import torchvision.transforms as tvf
|
39 |
|
40 |
+
from transformers import AutoTokenizer, AutoModel, AutoProcessor, SamModel
|
41 |
+
|
42 |
silent = False
|
43 |
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
44 |
pe3r = Models('cpu') #
|
|
|
306 |
|
307 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
308 |
pe3r.sam2.to(device)
|
309 |
+
|
310 |
+
siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
311 |
+
siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
312 |
+
|
313 |
# pe3r.siglip_processor.to(device)
|
314 |
# pe3r.siglip.to(device)
|
315 |
|
|
|
405 |
seg_imgs = np.stack(seg_img_list, axis=0) # b,H,W,3
|
406 |
seg_imgs = torch.from_numpy(seg_imgs).permute(0,3,1,2) # / 255.0
|
407 |
|
408 |
+
inputs = siglip_processor(images=seg_imgs, return_tensors="pt")
|
409 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
410 |
|
411 |
+
image_features = siglip.get_image_features(**inputs)
|
412 |
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
|
413 |
image_features = image_features.detach().cpu()
|
414 |
|
|
|
525 |
|
526 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
527 |
# pe3r.siglip_tokenizer.to(device)
|
528 |
+
siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
529 |
+
siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
530 |
|
531 |
texts = [text]
|
532 |
+
inputs = siglip_tokenizer(text=texts, padding="max_length", return_tensors="pt")
|
533 |
inputs = {key: value.to(device) for key, value in inputs.items()}
|
534 |
with torch.no_grad():
|
535 |
+
text_feats =siglip.get_text_features(**inputs)
|
536 |
text_feats = text_feats / text_feats.norm(dim=-1, keepdim=True)
|
537 |
scene.render_image(text_feats, threshold)
|
538 |
scene.ori_imgs = scene.rendered_imgs
|
modules/pe3r/models.py
CHANGED
@@ -47,6 +47,6 @@ class Models:
|
|
47 |
self.yolov8 = ObjectAwareModel(YOLO8_CKP)
|
48 |
|
49 |
# -- siglip --
|
50 |
-
self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
51 |
-
self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
52 |
-
self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
|
|
47 |
self.yolov8 = ObjectAwareModel(YOLO8_CKP)
|
48 |
|
49 |
# -- siglip --
|
50 |
+
# self.siglip = AutoModel.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
51 |
+
# self.siglip_tokenizer = AutoTokenizer.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|
52 |
+
# self.siglip_processor = AutoProcessor.from_pretrained("google/siglip-large-patch16-256", device_map=device)
|