Beom0's picture
Update app.py
3567b7f verified
"""
Copyright (c) 2024-present Naver Cloud Corp.
This source code is based on code from the Segment Anything Model (SAM)
(https://github.com/facebookresearch/segment-anything).
This source code is licensed under the license found in the
LICENSE file in the root directory of this source tree.
"""
import os, sys
sys.path.append(os.getcwd())
# Gradio demo, comparison SAM vs ZIM
import os
import torch
import gradio as gr
from gradio_image_prompter import ImagePrompter
import numpy as np
import cv2
from zim import zim_model_registry, ZimPredictor, ZimAutomaticMaskGenerator
from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
from zim.utils import show_mat_anns
from huggingface_hub import hf_hub_download
def get_shortest_axis(image):
h, w, _ = image.shape
return h if h < w else w
def reset_image(image, prompts):
if image is None:
image = np.zeros((1024, 1024, 3), dtype=np.uint8)
else:
image = image['image']
zim_predictor.set_image(image)
sam_predictor.set_image(image)
prompts = dict()
black = np.zeros(image.shape[:2], dtype=np.uint8)
return (image, image, image, image, black, black, black, black, prompts)
def reset_example_image(image, prompts):
if image is None:
image = np.zeros((1024, 1024, 3), dtype=np.uint8)
zim_predictor.set_image(image)
sam_predictor.set_image(image)
prompts = dict()
black = np.zeros(image.shape[:2], dtype=np.uint8)
image_dict = {}
image_dict['image'] = image
image_dict['prompts'] = prompts
return (image, image_dict, image, image, image, black, black, black, black, prompts)
def run_amg(image):
gr.Info('Checkout ZIM Auto Mask tab.', duration=3)
zim_masks = zim_mask_generator.generate(image)
zim_masks_vis = show_mat_anns(image, zim_masks)
sam_masks = sam_mask_generator.generate(image)
sam_masks_vis = show_mat_anns(image, sam_masks)
return zim_masks_vis, sam_masks_vis
def run_model(image, prompts):
if not prompts:
raise gr.Error(f'Please input any point or BBox')
gr.Info('Checkout ZIM Mask tab.', duration=3)
point_coords = None
point_labels = None
boxes = None
if "point" in prompts:
point_coords, point_labels = [], []
for type, pts in prompts["point"]:
point_coords.append(pts)
point_labels.append(type)
point_coords = np.array(point_coords)
point_labels = np.array(point_labels)
if "bbox" in prompts:
boxes = prompts['bbox']
boxes = np.array(boxes)
if "scribble" in prompts:
point_coords, point_labels = [], []
for pts in prompts["scribble"]:
point_coords.append(np.flip(pts))
point_labels.append(1)
if len(point_coords) == 0:
raise gr.Error("Please input any scribbles.")
point_coords = np.array(point_coords)
point_labels = np.array(point_labels)
# run ZIM
zim_mask, _, _ = zim_predictor.predict(
point_coords=point_coords,
point_labels=point_labels,
box=boxes,
multimask_output=False,
)
zim_mask = np.squeeze(zim_mask, axis=0)
zim_mask = np.uint8(zim_mask * 255)
# run SAM
sam_mask, _, _ = sam_predictor.predict(
point_coords=point_coords,
point_labels=point_labels,
box=boxes,
multimask_output=False,
)
sam_mask = np.squeeze(sam_mask, axis=0)
sam_mask = np.uint8(sam_mask * 255)
return zim_mask, sam_mask
def reset_scribble(image, scribble, prompts):
# scribble = dict()
for k in prompts.keys():
prompts[k] = []
for k, v in scribble.items():
scribble[k] = None
black = np.zeros(image.shape[:3], dtype=np.uint8)
return scribble, black, black
def update_scribble(image, scribble, prompts):
if "point" in prompts:
del prompts["point"]
if "bbox" in prompts:
del prompts["bbox"]
prompts = dict() # reset prompt
scribble_mask = scribble["layers"][0][..., -1] > 0
scribble_coords = np.argwhere(scribble_mask)
n_points = min(len(scribble_coords), 24)
indices = np.linspace(0, len(scribble_coords)-1, n_points, dtype=int)
scribble_sampled = scribble_coords[indices]
prompts["scribble"] = scribble_sampled
zim_mask, sam_mask = run_model(image, prompts)
return zim_mask, sam_mask, prompts
def draw_point(img, pt, size, color):
# draw circle with white boundary region
cv2.circle(img, (int(pt[0]), int(pt[1])), int(size * 1.3), (255, 255, 255), -1)
cv2.circle(img, (int(pt[0]), int(pt[1])), int(size * 0.9), color, -1)
def draw_images(image, mask, prompts):
if len(prompts) == 0 or mask.shape[1] == 1:
return image, image, image
minor = get_shortest_axis(image)
size = int(minor / 80)
image = np.float32(image)
def blending(image, mask):
mask = np.float32(mask) / 255
blended_image = np.zeros_like(image, dtype=np.float32)
blended_image[:, :, :] = [108, 0, 192]
blended_image = (image * 0.5) + (blended_image * 0.5)
img_with_mask = mask[:, :, None] * blended_image + (1 - mask[:, :, None]) * image
img_with_mask = np.uint8(img_with_mask)
return img_with_mask
img_with_mask = blending(image, mask)
img_with_point = img_with_mask.copy()
if "point" in prompts:
for type, pts in prompts["point"]:
if type == "Positive":
color = (0, 0, 255)
draw_point(img_with_point, pts, size, color)
elif type == "Negative":
color = (255, 0, 0)
draw_point(img_with_point, pts, size, color)
size = int(minor / 200)
return (
img,
img_with_mask,
)
def get_point_or_box_prompts(img, prompts):
image, img_prompts = img['image'], img['points']
point_prompts = []
box_prompts = []
for prompt in img_prompts:
for p in range(len(prompt)):
prompt[p] = int(prompt[p])
if prompt[2] == 2 and prompt[5] == 3: # box prompt
if len(box_prompts) != 0:
raise gr.Error("Please input only one BBox.", duration=3)
box_prompts.append([prompt[0], prompt[1], prompt[3], prompt[4]])
elif prompt[2] == 1 and prompt[5] == 4: # Positive point prompt
point_prompts.append((1, (prompt[0], prompt[1])))
elif prompt[2] == 0 and prompt[5] == 4: # Negative point prompt
point_prompts.append((0, (prompt[0], prompt[1])))
if "scribble" in prompts:
del prompts["scribble"]
if len(point_prompts) > 0:
prompts['point'] = point_prompts
elif 'point' in prompts:
del prompts['point']
if len(box_prompts) > 0:
prompts['bbox'] = box_prompts
elif 'bbox' in prompts:
del prompts['bbox']
zim_mask, sam_mask = run_model(image, prompts)
return image, zim_mask, sam_mask, prompts
def get_examples():
assets_dir = os.path.join(os.path.dirname(__file__), 'examples')
images = os.listdir(assets_dir)
return [os.path.join(assets_dir, img) for img in images]
def download_onnx_weights(repo_id="naver-iv/zim-anything-vitb", file_dir="zim_vit_b_2043"):
hf_hub_download(repo_id=repo_id, filename=f"{file_dir}/encoder.onnx")
filepath = hf_hub_download(repo_id=repo_id, filename=f"{file_dir}/decoder.onnx")
return os.path.dirname(filepath)
if __name__ == "__main__":
backbone = "vit_b"
# load ZIM
zim = zim_model_registry[backbone](checkpoint=download_onnx_weights())
if torch.cuda.is_available():
zim.cuda()
zim_predictor = ZimPredictor(zim)
zim_mask_generator = ZimAutomaticMaskGenerator(
zim,
pred_iou_thresh=0.7,
points_per_batch=8,
stability_score_thresh=0.9,
)
# load SAM
ckpt_sam = "ckpts/sam_vit_b_01ec64.pth"
sam = sam_model_registry[backbone](checkpoint=ckpt_sam)
if torch.cuda.is_available():
sam.cuda()
sam_predictor = SamPredictor(sam)
sam_mask_generator = SamAutomaticMaskGenerator(
sam,
points_per_batch=8,
)
with gr.Blocks() as demo:
gr.Markdown("# <center> [Demo] ZIM: Zero-Shot Image Matting for Anything")
prompts = gr.State(dict())
img = gr.Image(visible=False)
example_image = gr.Image(visible=False)
with gr.Row():
with gr.Column():
# Point and Bbox prompt
with gr.Tab(label="Point or Box"):
img_with_point_or_box = ImagePrompter(
label="query image",
sources="upload"
)
interactions = "Left Click (Pos) | Middle/Right Click (Neg) | Press Move (Box)"
gr.Markdown("<h3 style='text-align: center'> {} </h3>".format(interactions))
run_bttn = gr.Button("Run")
amg_bttn = gr.Button("Automatic Mask Generation")
# Scribble prompt
with gr.Tab(label="Scribble"):
img_with_scribble = gr.ImageEditor(
label="Scribble",
brush=gr.Brush(colors=["#00FF00"], default_size=15),
sources="upload",
transforms=None,
layers=False
)
interactions = "Press Move (Scribble)"
gr.Markdown("<h3 style='text-align: center'> Step 1. Select Draw button </h3>")
gr.Markdown("<h3 style='text-align: center'> Step 2. {} </h3>".format(interactions))
scribble_bttn = gr.Button("Run")
scribble_reset_bttn = gr.Button("Reset Scribbles")
amg_scribble_bttn = gr.Button("Automatic Mask Generation")
# Example image
gr.Examples(get_examples(), inputs=[example_image])
# with gr.Row():
with gr.Column():
with gr.Tab(label="ZIM Image"):
img_with_zim_mask = gr.Image(
label="ZIM Image",
interactive=False
)
with gr.Tab(label="ZIM Mask"):
zim_mask = gr.Image(
label="ZIM Mask",
image_mode="L",
interactive=False
)
with gr.Tab(label="ZIM Auto Mask"):
zim_amg = gr.Image(
label="ZIM Auto Mask",
interactive=False
)
with gr.Column():
with gr.Tab(label="SAM Image"):
img_with_sam_mask = gr.Image(
label="SAM image",
interactive=False
)
with gr.Tab(label="SAM Mask"):
sam_mask = gr.Image(
label="SAM Mask",
image_mode="L",
interactive=False
)
with gr.Tab(label="SAM Auto Mask"):
sam_amg = gr.Image(
label="SAM Auto Mask",
interactive=False
)
example_image.change(
reset_example_image,
[example_image, prompts],
[
img,
img_with_point_or_box,
img_with_scribble,
img_with_zim_mask,
img_with_sam_mask,
zim_amg,
sam_amg,
zim_mask,
sam_mask,
prompts,
]
)
img_with_point_or_box.upload(
reset_image,
[img_with_point_or_box, prompts],
[
img,
img_with_scribble,
img_with_zim_mask,
img_with_sam_mask,
zim_amg,
sam_amg,
zim_mask,
sam_mask,
prompts,
],
)
amg_bttn.click(
run_amg,
[img],
[zim_amg, sam_amg]
)
amg_scribble_bttn.click(
run_amg,
[img],
[zim_amg, sam_amg]
)
run_bttn.click(
get_point_or_box_prompts,
[img_with_point_or_box, prompts],
[img, zim_mask, sam_mask, prompts]
)
zim_mask.change(
draw_images,
[img, zim_mask, prompts],
[
img, img_with_zim_mask,
],
)
sam_mask.change(
draw_images,
[img, sam_mask, prompts],
[
img, img_with_sam_mask,
],
)
scribble_reset_bttn.click(
reset_scribble,
[img, img_with_scribble, prompts],
[img_with_scribble, zim_mask, sam_mask],
)
scribble_bttn.click(
update_scribble,
[img, img_with_scribble, prompts],
[zim_mask, sam_mask, prompts],
)
demo.queue()
demo.launch()