Spaces:
Sleeping
Sleeping
cheng
commited on
Commit
·
863c45d
1
Parent(s):
baab402
update PIL image
Browse files- clip_component.py +2 -2
- detector.py +3 -4
- grounding_component.py +2 -2
clip_component.py
CHANGED
@@ -17,8 +17,8 @@ def get_token_from_clip(image):
|
|
17 |
text_features = model.encode_text(text_tokens).float()
|
18 |
text_features /= text_features.norm(dim=-1, keepdim=True)
|
19 |
|
20 |
-
image_pil = Image.fromarray(
|
21 |
-
image_input = preprocess(
|
22 |
|
23 |
with torch.no_grad():
|
24 |
image_feature = model.encode_image(image_input)
|
|
|
17 |
text_features = model.encode_text(text_tokens).float()
|
18 |
text_features /= text_features.norm(dim=-1, keepdim=True)
|
19 |
|
20 |
+
image_pil = Image.fromarray(image.astype('uint8'))
|
21 |
+
image_input = preprocess(image_pil).unsqueeze(0).to(device) # Add batch dimension
|
22 |
|
23 |
with torch.no_grad():
|
24 |
image_feature = model.encode_image(image_input)
|
detector.py
CHANGED
@@ -2,8 +2,7 @@ from clip_component import get_token_from_clip
|
|
2 |
from grounding_component import run_grounding
|
3 |
|
4 |
def detect(image):
|
5 |
-
|
6 |
-
print('
|
7 |
-
|
8 |
-
predict_image = run_grounding(image,token)
|
9 |
return predict_image
|
|
|
2 |
from grounding_component import run_grounding
|
3 |
|
4 |
def detect(image):
|
5 |
+
describe = get_token_from_clip(image)
|
6 |
+
print('describe:',describe)
|
7 |
+
predict_image = run_grounding(image,describe)
|
|
|
8 |
return predict_image
|
grounding_component.py
CHANGED
@@ -57,10 +57,10 @@ def image_transform_grounding_for_vis(init_image):
|
|
57 |
|
58 |
model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
|
59 |
|
60 |
-
def run_grounding(input_image,
|
61 |
pil_img = Image.fromarray(input_image)
|
62 |
init_image = pil_img.convert("RGB")
|
63 |
-
grounding_caption =
|
64 |
box_threshold = 0.25
|
65 |
text_threshold = 0.25
|
66 |
|
|
|
57 |
|
58 |
model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
|
59 |
|
60 |
+
def run_grounding(input_image, describe):
|
61 |
pil_img = Image.fromarray(input_image)
|
62 |
init_image = pil_img.convert("RGB")
|
63 |
+
grounding_caption = describe
|
64 |
box_threshold = 0.25
|
65 |
text_threshold = 0.25
|
66 |
|