dis-background-removal

Sleeping

App Files Files Community

petergpt commited on Feb 17

Commit

07d7c0a

verified ·

1 Parent(s): 66a61d0

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -47

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
 import cv2
 import gradio as gr
 import os
@@ -13,6 +12,7 @@ import warnings
 import time
 warnings.filterwarnings("ignore")
 os.system("git clone https://github.com/xuebinqin/DIS")
 os.system("mv DIS/IS-Net/* .")
@@ -36,10 +36,10 @@ class GOSNormalize(object):
         self.std = std
     def __call__(self,image):
-        image = normalize(image,self.mean,self.std)
         return image
-transform =  transforms.Compose([GOSNormalize([0.5,0.5,0.5],[1.0,1.0,1.0])])
 def load_image(im_path, hypar):
     im = im_reader(im_path)
@@ -59,88 +59,94 @@ def build_model(hypar, device):
                 layer.float()
     net.to(device)
-    if(hypar["restore_model"]!=""):
-        net.load_state_dict(torch.load(hypar["model_path"]+"/"+hypar["restore_model"], map_location=device))
         net.to(device)
-    net.eval()
     return net
 def predict(net, inputs_val, shapes_val, hypar, device):
     net.eval()
-    if(hypar["model_digit"]=="full"):
         inputs_val = inputs_val.type(torch.FloatTensor)
     else:
         inputs_val = inputs_val.type(torch.HalfTensor)
-    inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)
     ds_val = net(inputs_val_v)[0]
-    pred_val = ds_val[0][0,:,:,:]
     pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0),
                                         (shapes_val[0][0], shapes_val[0][1]),
                                         mode='bilinear'))
     ma = torch.max(pred_val)
     mi = torch.min(pred_val)
-    pred_val = (pred_val - mi) / (ma - mi + 1e-8)  # normalize to 0~1, +1e-8 to avoid div by zero
-    if device == 'cuda':
         torch.cuda.empty_cache()
     return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)
 # Parameters
-hypar = {}
-hypar["model_path"] = "./saved_models"
-hypar["restore_model"] = "isnet.pth"
-hypar["interm_sup"] = False
-hypar["model_digit"] = "full"
-hypar["seed"] = 0
-hypar["cache_size"] = [1024, 1024]
-hypar["input_size"] = [1024, 1024]
-hypar["crop_size"] = [1024, 1024]
-hypar["model"] = ISNetDIS()
-# Build Model
 net = build_model(hypar, device)
-def inference(images, logs):
     start_time = time.time()
-    # If user didn't upload images, just return empty
-    if not images:
         return [], logs, logs
     processed_pairs = []
-    for img_path in images:
-        image_tensor, orig_size = load_image(img_path, hypar)
         mask = predict(net, image_tensor, orig_size, hypar, device)
         pil_mask = Image.fromarray(mask).convert('L')
-        im_rgb = Image.open(img_path).convert("RGB")
         im_rgba = im_rgb.copy()
         im_rgba.putalpha(pil_mask)
         processed_pairs.append([im_rgba, pil_mask])
     end_time = time.time()
     elapsed = round(end_time - start_time, 2)
-    # Flatten the list so that we can display all images in a single Gallery
     final_images = []
     for pair in processed_pairs:
         final_images.extend(pair)
-    # Update logs
-    logs = logs or ""
-    logs += f"Processed {len(processed_pairs)} image(s) in {elapsed} seconds.\n"
     return final_images, logs, logs
 title = "Highly Accurate Dichotomous Image Segmentation"
 description = (
-    "This is an unofficial demo for DIS, a model that can remove the background from a given image. "
-    "To use it, simply upload up to 3 images, or click one of the examples to load them. "
-    "Read more at the links below.<br>"
     "GitHub: https://github.com/xuebinqin/DIS<br>"
     "Telegram bot: https://t.me/restoration_photo_bot<br>"
     "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)"
@@ -152,22 +158,24 @@ article = (
 interface = gr.Interface(
     fn=inference,
-    inputs=[gr.Image(
-                type='filepath',
-                label='Images (up to 3)',
-                multiple=True,
-                max_count=3
-            ),
-            gr.State()],
     outputs=[
         gr.Gallery(label="Output (rgba + mask)"),
         gr.State(),
         gr.Textbox(label="Logs", lines=6)
     ],
-    examples=[['robot.png'], ['ship.png']],  # for multi-image examples, pass a list like ['robot.png','ship.png']
     title=title,
     description=description,
     article=article,
     flagging_mode="never",
-    cache_mode="lazy",
 ).queue().launch(show_api=True, show_error=True)

 import cv2
 import gradio as gr
 import os
 import time
 warnings.filterwarnings("ignore")
+# Clone the DIS repo and move contents (make sure this only happens once per session)
 os.system("git clone https://github.com/xuebinqin/DIS")
 os.system("mv DIS/IS-Net/* .")
         self.std = std
     def __call__(self,image):
+        image = normalize(image, self.mean, self.std)
         return image
+transform = transforms.Compose([GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])])
 def load_image(im_path, hypar):
     im = im_reader(im_path)
                 layer.float()
     net.to(device)
+    if hypar["restore_model"] != "":
+        net.load_state_dict(torch.load(os.path.join(hypar["model_path"], hypar["restore_model"]), map_location=device))
         net.to(device)
+    net.eval()
     return net
 def predict(net, inputs_val, shapes_val, hypar, device):
     net.eval()
+    if hypar["model_digit"] == "full":
         inputs_val = inputs_val.type(torch.FloatTensor)
     else:
         inputs_val = inputs_val.type(torch.HalfTensor)
+    inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)
     ds_val = net(inputs_val_v)[0]
+    pred_val = ds_val[0][0, :, :, :]
     pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0),
                                         (shapes_val[0][0], shapes_val[0][1]),
                                         mode='bilinear'))
     ma = torch.max(pred_val)
     mi = torch.min(pred_val)
+    # normalize to [0, 1], add a small epsilon to avoid division by zero
+    pred_val = (pred_val - mi) / (ma - mi + 1e-8)
+    if device == 'cuda':
         torch.cuda.empty_cache()
     return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)
 # Parameters
+hypar = {
+    "model_path": "./saved_models",
+    "restore_model": "isnet.pth",
+    "interm_sup": False,
+    "model_digit": "full",
+    "seed": 0,
+    "cache_size": [1024, 1024],
+    "input_size": [1024, 1024],
+    "crop_size": [1024, 1024],
+    "model": ISNetDIS()
+}
+# Build the model
 net = build_model(hypar, device)
+def inference(img1, img2, img3, logs):
+    """
+    Process up to 3 images in parallel (each can be None if not provided).
+    """
     start_time = time.time()
+    logs = logs or ""  # initialize logs if None
+    # Gather images into a list (filter out None)
+    image_paths = [i for i in [img1, img2, img3] if i is not None]
+    if not image_paths:
+        # No images were uploaded
+        logs += f"No images to process.\n"
         return [], logs, logs
     processed_pairs = []
+    for path in image_paths:
+        image_tensor, orig_size = load_image(path, hypar)
         mask = predict(net, image_tensor, orig_size, hypar, device)
         pil_mask = Image.fromarray(mask).convert('L')
+        im_rgb = Image.open(path).convert("RGB")
         im_rgba = im_rgb.copy()
         im_rgba.putalpha(pil_mask)
         processed_pairs.append([im_rgba, pil_mask])
     end_time = time.time()
     elapsed = round(end_time - start_time, 2)
+    # Flatten into final gallery list
     final_images = []
     for pair in processed_pairs:
         final_images.extend(pair)
+    logs += f"Processed {len(processed_pairs)} image(s) in {elapsed} second(s).\n"
+    # Return the flattened gallery, state, and logs text
     return final_images, logs, logs
 title = "Highly Accurate Dichotomous Image Segmentation"
 description = (
+    "This is an unofficial demo for DIS, a model that can remove the background from up to 3 images. "
+    "Simply upload 1 to 3 images, or use the example images. "
     "GitHub: https://github.com/xuebinqin/DIS<br>"
     "Telegram bot: https://t.me/restoration_photo_bot<br>"
     "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)"
 interface = gr.Interface(
     fn=inference,
+    inputs=[
+        gr.Image(type='filepath', label='Image 1'),
+        gr.Image(type='filepath', label='Image 2'),
+        gr.Image(type='filepath', label='Image 3'),
+        gr.State()
+    ],
     outputs=[
         gr.Gallery(label="Output (rgba + mask)"),
         gr.State(),
         gr.Textbox(label="Logs", lines=6)
     ],
+    examples=[
+        ["robot.png", None, None],
+        ["robot.png", "ship.png", None],
+    ],
     title=title,
     description=description,
     article=article,
     flagging_mode="never",
+    cache_mode="lazy"
 ).queue().launch(show_api=True, show_error=True)