finegrain-object-cutter_v1

Sleeping

App Files Files Community

AIRider commited on Nov 20, 2024

Commit

d5de94c

verified ·

1 Parent(s): 9e01353

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +60 -187

src/app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import tempfile
 import time
 from collections.abc import Sequence
-from typing import Any, cast
 import gradio as gr
 import numpy as np
@@ -14,7 +14,6 @@ from PIL import Image
 from pymatting.foreground.estimate_foreground_ml import estimate_foreground_ml
 from refiners.fluxion.utils import no_grad
 from refiners.solutions import BoxSegmenter
-from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
 BoundingBox = tuple[int, int, int, int]
@@ -23,18 +22,11 @@ pillow_heif.register_avif_opener()
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# weird dance because ZeroGPU
 segmenter = BoxSegmenter(device="cpu")
 segmenter.device = device
 segmenter.model = segmenter.model.to(device=segmenter.device)
-gd_model_path = "IDEA-Research/grounding-dino-base"
-gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
-gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_dtype=torch.float32)
-gd_model = gd_model.to(device=device)  # type: ignore
-assert isinstance(gd_model, GroundingDinoForObjectDetection)
 def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
     if not bboxes:
         return None
@@ -48,32 +40,6 @@ def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
         max(bbox[3] for bbox in bboxes),
     )
-def corners_to_pixels_format(bboxes: torch.Tensor, width: int, height: int) -> torch.Tensor:
-    x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
-    return torch.stack((x1.clamp_(0, width), y1.clamp_(0, height), x2.clamp_(0, width), y2.clamp_(0, height)), dim=-1)
-def gd_detect(img: Image.Image, prompt: str) -> BoundingBox | None:
-    assert isinstance(gd_processor, GroundingDinoProcessor)
-    # Grounding Dino expects a dot after each category.
-    inputs = gd_processor(images=img, text=f"{prompt}.", return_tensors="pt").to(device=device)
-    with no_grad():
-        outputs = gd_model(**inputs)
-    width, height = img.size
-    results: dict[str, Any] = gd_processor.post_process_grounded_object_detection(
-        outputs,
-        inputs["input_ids"],
-        target_sizes=[(height, width)],
-    )[0]
-    assert "boxes" in results and isinstance(results["boxes"], torch.Tensor)
-    bboxes = corners_to_pixels_format(results["boxes"].cpu(), width, height)
-    return bbox_union(bboxes.numpy().tolist())
 def apply_mask(
     img: Image.Image,
     mask_img: Image.Image,
@@ -86,54 +52,39 @@ def apply_mask(
     if defringe:
         # Mitigate edge halo effects via color decontamination
         rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
-        foreground = cast(np.ndarray[Any, np.dtype[np.uint8]], estimate_foreground_ml(rgb, alpha))
         img = Image.fromarray((foreground * 255).astype("uint8"))
     result = Image.new("RGBA", img.size)
     result.paste(img, (0, 0), mask_img)
     return result
 @spaces.GPU
 def _gpu_process(
     img: Image.Image,
-    prompt: str | BoundingBox | None,
 ) -> tuple[Image.Image, BoundingBox | None, list[str]]:
-    # Because of ZeroGPU shenanigans, we need a *single* function with the
-    # `spaces.GPU` decorator that *does not* contain postprocessing.
     time_log: list[str] = []
-    if isinstance(prompt, str):
-        t0 = time.time()
-        bbox = gd_detect(img, prompt)
-        time_log.append(f"detect: {time.time() - t0}")
-        if not bbox:
-            print(time_log[0])
-            raise gr.Error("No object detected")
-    else:
-        bbox = prompt
     t0 = time.time()
     mask = segmenter(img, bbox)
     time_log.append(f"segment: {time.time() - t0}")
     return mask, bbox, time_log
 def _process(
     img: Image.Image,
-    prompt: str | BoundingBox | None,
 ) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
     # enforce max dimensions for pymatting performance reasons
     if img.width > 2048 or img.height > 2048:
         orig_res = max(img.width, img.height)
         img.thumbnail((2048, 2048))
-        if isinstance(prompt, tuple):
-            x0, y0, x1, y1 = (int(x * 2048 / orig_res) for x in prompt)
-            prompt = (x0, y0, x1, y1)
-    mask, bbox, time_log = _gpu_process(img, prompt)
     t0 = time.time()
     masked_alpha = apply_mask(img, mask, defringe=True)
@@ -152,7 +103,6 @@ def _process(
     return (img, masked_rgb), gr.DownloadButton(value=temp.name, interactive=True)
 def process_bbox(prompts: dict[str, Any]) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
     assert isinstance(img := prompts["image"], Image.Image)
     assert isinstance(boxes := prompts["boxes"], list)
@@ -164,38 +114,17 @@ def process_bbox(prompts: dict[str, Any]) -> tuple[tuple[Image.Image, Image.Imag
         bbox = None
     return _process(img, bbox)
 def on_change_bbox(prompts: dict[str, Any] | None):
     return gr.update(interactive=prompts is not None)
-def process_prompt(img: Image.Image, prompt: str) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
-    return _process(img, prompt)
-def on_change_prompt(img: Image.Image | None, prompt: str | None):
-    return gr.update(interactive=bool(img and prompt))
 TITLE = """
 <center>
-  <div style="
-    background-color: #ff9100;
-    color: #1f2937;
-    padding: 0.5rem 1rem;
-    font-size: 1.25rem;
-  ">
-    🚀 For an optimized version of this space, try out the
-    <a href="https://finegrain.ai/editor?utm_source=hf&utm_campaign=object-cutter" target="_blank">Finegrain Editor</a>! You'll find there all our AI tools made available in a nice UI. 🚀
-  </div>
   <h1 style="font-size: 1.5rem; margin-bottom: 0.5rem;">
-    Object Cutter Powered By Refiners
   </h1>
   <p>
-    Create high-quality HD cutouts for any object in your image with just a text prompt — no manual work required!
     <br>
     The object will be available on a transparent background, ready to paste elsewhere.
   </p>
@@ -211,118 +140,62 @@ TITLE = """
         href="https://huggingface.co/datasets/Nfiniteai/product-masks-sample"
         target="_blank"
     >synthetic data provided by Nfinite</a>.
-    <br>
-    It is powered by Refiners, our open source micro-framework for simple foundation model adaptation.
-    If you enjoyed it, please consider starring Refiners on GitHub!
   </p>
-  <a href="https://github.com/finegrain-ai/refiners" target="_blank">
-    <img src="https://img.shields.io/github/stars/finegrain-ai/refiners?style=social" />
-  </a>
 </center>
 """
 with gr.Blocks() as demo:
     gr.HTML(TITLE)
-    with gr.Tab("By prompt", id="tab_prompt"):
-        with gr.Row():
-            with gr.Column():
-                iimg = gr.Image(type="pil", label="Input")
-                prompt = gr.Textbox(label="What should we cut?")
-                btn = gr.ClearButton(value="Cut Out Object", interactive=False)
-            with gr.Column():
-                oimg = ImageSlider(label="Before / After", show_download_button=False, interactive=False)
-                dlbt = gr.DownloadButton("Download Cutout", interactive=False)
-        btn.add(oimg)
-        for inp in [iimg, prompt]:
-            inp.change(
-                fn=on_change_prompt,
-                inputs=[iimg, prompt],
-                outputs=[btn],
             )
-        btn.click(
-            fn=process_prompt,
-            inputs=[iimg, prompt],
-            outputs=[oimg, dlbt],
-        )
-        examples = [
-            [
-                "examples/potted-plant.jpg",
-                "potted plant",
-            ],
-            [
-                "examples/chair.jpg",
-                "chair",
-            ],
-            [
-                "examples/black-lamp.jpg",
-                "black lamp",
-            ],
-        ]
-        ex = gr.Examples(
-            examples=examples,
-            inputs=[iimg, prompt],
-            outputs=[oimg, dlbt],
-            fn=process_prompt,
-            cache_examples=True,
-        )
-    with gr.Tab("By bounding box", id="tab_bb"):
-        with gr.Row():
-            with gr.Column():
-                annotator = image_annotator(
-                    image_type="pil",
-                    disable_edit_boxes=True,
-                    show_download_button=False,
-                    show_share_button=False,
-                    single_box=True,
-                    label="Input",
-                )
-                btn = gr.ClearButton(value="Cut Out Object", interactive=False)
-            with gr.Column():
-                oimg = ImageSlider(label="Before / After", show_download_button=False)
-                dlbt = gr.DownloadButton("Download Cutout", interactive=False)
-        btn.add(oimg)
-        annotator.change(
-            fn=on_change_bbox,
-            inputs=[annotator],
-            outputs=[btn],
-        )
-        btn.click(
-            fn=process_bbox,
-            inputs=[annotator],
-            outputs=[oimg, dlbt],
-        )
-        examples = [
-            {
-                "image": "examples/potted-plant.jpg",
-                "boxes": [{"xmin": 51, "ymin": 511, "xmax": 639, "ymax": 1255}],
-            },
-            {
-                "image": "examples/chair.jpg",
-                "boxes": [{"xmin": 98, "ymin": 330, "xmax": 973, "ymax": 1468}],
-            },
-            {
-                "image": "examples/black-lamp.jpg",
-                "boxes": [{"xmin": 88, "ymin": 148, "xmax": 700, "ymax": 1414}],
-            },
-        ]
-        ex = gr.Examples(
-            examples=examples,
-            inputs=[annotator],
-            outputs=[oimg, dlbt],
-            fn=process_bbox,
-            cache_examples=True,
-        )
-demo.launch(share=False)

 import tempfile
 import time
+from typing import Any
 from collections.abc import Sequence
 import gradio as gr
 import numpy as np
 from pymatting.foreground.estimate_foreground_ml import estimate_foreground_ml
 from refiners.fluxion.utils import no_grad
 from refiners.solutions import BoxSegmenter
 BoundingBox = tuple[int, int, int, int]
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Initialize segmenter
 segmenter = BoxSegmenter(device="cpu")
 segmenter.device = device
 segmenter.model = segmenter.model.to(device=segmenter.device)
 def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox | None:
     if not bboxes:
         return None
         max(bbox[3] for bbox in bboxes),
     )
 def apply_mask(
     img: Image.Image,
     mask_img: Image.Image,
     if defringe:
         # Mitigate edge halo effects via color decontamination
         rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
+        foreground = estimate_foreground_ml(rgb, alpha)
         img = Image.fromarray((foreground * 255).astype("uint8"))
     result = Image.new("RGBA", img.size)
     result.paste(img, (0, 0), mask_img)
     return result
 @spaces.GPU
 def _gpu_process(
     img: Image.Image,
+    bbox: BoundingBox | None,
 ) -> tuple[Image.Image, BoundingBox | None, list[str]]:
     time_log: list[str] = []
     t0 = time.time()
     mask = segmenter(img, bbox)
     time_log.append(f"segment: {time.time() - t0}")
     return mask, bbox, time_log
 def _process(
     img: Image.Image,
+    bbox: BoundingBox | None,
 ) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
     # enforce max dimensions for pymatting performance reasons
     if img.width > 2048 or img.height > 2048:
         orig_res = max(img.width, img.height)
         img.thumbnail((2048, 2048))
+        if isinstance(bbox, tuple):
+            x0, y0, x1, y1 = (int(x * 2048 / orig_res) for x in bbox)
+            bbox = (x0, y0, x1, y1)
+    mask, bbox, time_log = _gpu_process(img, bbox)
     t0 = time.time()
     masked_alpha = apply_mask(img, mask, defringe=True)
     return (img, masked_rgb), gr.DownloadButton(value=temp.name, interactive=True)
 def process_bbox(prompts: dict[str, Any]) -> tuple[tuple[Image.Image, Image.Image], gr.DownloadButton]:
     assert isinstance(img := prompts["image"], Image.Image)
     assert isinstance(boxes := prompts["boxes"], list)
         bbox = None
     return _process(img, bbox)
 def on_change_bbox(prompts: dict[str, Any] | None):
     return gr.update(interactive=prompts is not None)
 TITLE = """
 <center>
   <h1 style="font-size: 1.5rem; margin-bottom: 0.5rem;">
+    Object Cutter With Bounding Box
   </h1>
   <p>
+    Create high-quality HD cutouts for any object in your image using bounding box selection.
     <br>
     The object will be available on a transparent background, ready to paste elsewhere.
   </p>
         href="https://huggingface.co/datasets/Nfiniteai/product-masks-sample"
         target="_blank"
     >synthetic data provided by Nfinite</a>.
   </p>
 </center>
 """
 with gr.Blocks() as demo:
     gr.HTML(TITLE)
+    with gr.Row():
+        with gr.Column():
+            annotator = image_annotator(
+                image_type="pil",
+                disable_edit_boxes=True,
+                show_download_button=False,
+                show_share_button=False,
+                single_box=True,
+                label="Input",
             )
+            btn = gr.ClearButton(value="Cut Out Object", interactive=False)
+        with gr.Column():
+            oimg = ImageSlider(label="Before / After", show_download_button=False)
+            dlbt = gr.DownloadButton("Download Cutout", interactive=False)
+    btn.add(oimg)
+    annotator.change(
+        fn=on_change_bbox,
+        inputs=[annotator],
+        outputs=[btn],
+    )
+    btn.click(
+        fn=process_bbox,
+        inputs=[annotator],
+        outputs=[oimg, dlbt],
+    )
+    examples = [
+        {
+            "image": "examples/potted-plant.jpg",
+            "boxes": [{"xmin": 51, "ymin": 511, "xmax": 639, "ymax": 1255}],
+        },
+        {
+            "image": "examples/chair.jpg",
+            "boxes": [{"xmin": 98, "ymin": 330, "xmax": 973, "ymax": 1468}],
+        },
+        {
+            "image": "examples/black-lamp.jpg",
+            "boxes": [{"xmin": 88, "ymin": 148, "xmax": 700, "ymax": 1414}],
+        },
+    ]
+    ex = gr.Examples(
+        examples=examples,
+        inputs=[annotator],
+        outputs=[oimg, dlbt],
+        fn=process_bbox,
+        cache_examples=True,
+    )
+demo.launch(share=False)