Spaces:

luminousncc
/

foodDetectionDemo

Sleeping

cheng commited on Jul 26, 2023

Commit

863c45d

1 Parent(s): baab402

update PIL image

Files changed (3) hide show

clip_component.py CHANGED Viewed

@@ -17,8 +17,8 @@ def get_token_from_clip(image):
     text_features = model.encode_text(text_tokens).float()
     text_features /= text_features.norm(dim=-1, keepdim=True)
-    image_pil = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
-    image_input = preprocess(image).unsqueeze(0).to(device)  # Add batch dimension
     with torch.no_grad():
         image_feature = model.encode_image(image_input)

     text_features = model.encode_text(text_tokens).float()
     text_features /= text_features.norm(dim=-1, keepdim=True)
+    image_pil = Image.fromarray(image.astype('uint8'))
+    image_input = preprocess(image_pil).unsqueeze(0).to(device)  # Add batch dimension
     with torch.no_grad():
         image_feature = model.encode_image(image_input)

detector.py CHANGED Viewed

@@ -2,8 +2,7 @@ from clip_component import get_token_from_clip
 from grounding_component import run_grounding
 def detect(image):
-    token = get_token_from_clip(image)
-    print('token')
-    print(token)
-    predict_image = run_grounding(image,token)
     return predict_image

 from grounding_component import run_grounding
 def detect(image):
+    describe = get_token_from_clip(image)
+    print('describe:',describe)
+    predict_image = run_grounding(image,describe)
     return predict_image

grounding_component.py CHANGED Viewed

@@ -57,10 +57,10 @@ def image_transform_grounding_for_vis(init_image):
 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
-def run_grounding(input_image, token):
     pil_img = Image.fromarray(input_image)
     init_image = pil_img.convert("RGB")
-    grounding_caption = "token"
     box_threshold = 0.25
     text_threshold = 0.25

 model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
+def run_grounding(input_image, describe):
     pil_img = Image.fromarray(input_image)
     init_image = pil_img.convert("RGB")
+    grounding_caption = describe
     box_threshold = 0.25
     text_threshold = 0.25