Spaces:

jadechoghari
/

robustsam

Running on Zero

App Files Files Community

jadechoghari commited on Aug 28, 2024

Commit

5017de6

verified ·

1 Parent(s): 0e6a2e9

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -40

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
-# no cpu required
-#TODO: update to gpu usage
 from transformers import pipeline, SamModel, SamProcessor
 import torch
 import numpy as np
 import spaces
 checkpoint = "google/owlv2-base-patch16-ensemble"
-detector = pipeline(model=checkpoint, task="zero-shot-object-detection")
-sam_model = SamModel.from_pretrained("jadechoghari/robustsam-vit-base")
 sam_processor = SamProcessor.from_pretrained("jadechoghari/robustsam-vit-base")
@@ -23,57 +23,54 @@ def query(image, texts, threshold):
   result_labels = []
   for pred in predictions:
-    box = pred["box"]
     score = pred["score"]
-    label = pred["label"]
-    box = [round(pred["box"]["xmin"], 2), round(pred["box"]["ymin"], 2),
-        round(pred["box"]["xmax"], 2), round(pred["box"]["ymax"], 2)]
-    inputs = sam_processor(
-            image,
-            input_boxes=[[[box]]],
-            return_tensors="pt"
-        )
-    with torch.no_grad():
-        outputs = sam_model(**inputs)
-    mask = sam_processor.image_processor.post_process_masks(
-        outputs.pred_masks.cpu(),
-        inputs["original_sizes"].cpu(),
-        inputs["reshaped_input_sizes"].cpu()
-    )[0][0][0].numpy()
-    mask = mask[np.newaxis, ...]
-    from PIL import Image, ImageDraw
-    # Convert mask to image format and overlay on the original image
-    mask_image = Image.fromarray((mask[0] * 255).astype(np.uint8))
-    mask_image = mask_image.convert("L")  # Convert to grayscale for transparency
-    mask_image = mask_image.resize(image.size)
-    # Create an alpha mask for transparency
-    alpha_mask = Image.new("L", mask_image.size, 128)  # Adjust transparency level here
-    image.paste(mask_image, (0, 0), alpha_mask)  # Overlay the mask on the image
-    # Save the annotated image
-    image.save("annotated_image.png")
-    print("saved image")
-    result_labels.append((mask, label))
   return image, result_labels
 import gradio as gr
-description = "This Space combines OWLv2, the state-of-the-art zero-shot object detection model with SAM, the state-of-the-art mask generation model. SAM normally doesn't accept text input. Combining SAM with OWLv2 makes SAM text promptable. Try the example or input an image and comma separated candidate labels to segment."
 demo = gr.Interface(
     query,
     inputs=[gr.Image(type="pil", label="Image Input"), gr.Textbox(label = "Candidate Labels"), gr.Slider(0, 1, value=0.05, label="Confidence Threshold")],
-    # outputs="annotatedimage", #comment this out - it looks weird
     outputs=gr.AnnotatedImage(label="Segmented Image"),
-    title="OWL 🤝 SAM",
     description=description,
     examples=[
-        ["./cats.png", "cat", 0.1],
     ],
     cache_examples=True
 )
-demo.launch(debug=True)

+# no gpu required
 from transformers import pipeline, SamModel, SamProcessor
 import torch
 import numpy as np
 import spaces
+device = "cuda" if torch.cuda.is_available() else "cpu"
 checkpoint = "google/owlv2-base-patch16-ensemble"
+detector = pipeline(model=checkpoint, task="zero-shot-object-detection", device=device)
+sam_model = SamModel.from_pretrained("jadechoghari/robustsam-vit-base").to(device)
 sam_processor = SamProcessor.from_pretrained("jadechoghari/robustsam-vit-base")
   result_labels = []
   for pred in predictions:
     score = pred["score"]
+    if score > 0.5:
+      box = pred["box"]
+      label = pred["label"]
+      box = [round(pred["box"]["xmin"], 2), round(pred["box"]["ymin"], 2),
+          round(pred["box"]["xmax"], 2), round(pred["box"]["ymax"], 2)]
+      inputs = sam_processor(
+              image,
+              input_boxes=[[[box]]],
+              return_tensors="pt"
+          ).to(device)
+      with torch.no_grad():
+          outputs = sam_model(**inputs)
+      mask = sam_processor.image_processor.post_process_masks(
+          outputs.pred_masks.cpu(),
+          inputs["original_sizes"].cpu(),
+          inputs["reshaped_input_sizes"].cpu()
+      )[0][0][0].numpy()
+      mask = mask[np.newaxis, ...]
+      result_labels.append((mask, label))
   return image, result_labels
 import gradio as gr
+description = (
+    "Welcome to RobustSAM by Snap Research."
+    "This Space uses RobustSAM, an enhanced version of the Segment Anything Model (SAM) with improved performance on low-quality images while maintaining zero-shot segmentation capabilities. "
+    "Thanks to its integration with OWLv2, RobustSAM becomes text-promptable, allowing for flexible and accurate segmentation, even with degraded image quality. Try the example or input an image with comma-separated candidate labels to see the enhanced segmentation results."
+)
 demo = gr.Interface(
     query,
     inputs=[gr.Image(type="pil", label="Image Input"), gr.Textbox(label = "Candidate Labels"), gr.Slider(0, 1, value=0.05, label="Confidence Threshold")],
     outputs=gr.AnnotatedImage(label="Segmented Image"),
+    title="RobustSAM",
     description=description,
     examples=[
+        ["./blur.jpg", "insect", 0.1],
+        ["./lowlight.jpg", "bus, window", 0.1],
+        ["./rain.jpg", "tree, leafs", 0.1],
+        ["./haze.jpg", "", 0.1],
     ],
     cache_examples=True
 )
+demo.launch()