dis-background-removal

Sleeping

App Files Files Community

petergpt commited on Feb 17

Commit

3472d22

verified ·

1 Parent(s): 07d7c0a

multiple upload

Browse files

Files changed (1) hide show

app.py +26 -49

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import warnings
 import time
 warnings.filterwarnings("ignore")
-# Clone the DIS repo and move contents (make sure this only happens once per session)
 os.system("git clone https://github.com/xuebinqin/DIS")
 os.system("mv DIS/IS-Net/* .")
@@ -22,22 +22,21 @@ from models import *
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-# Download official weights
 if not os.path.exists("saved_models"):
     os.mkdir("saved_models")
     os.system("mv isnet.pth saved_models/")
 class GOSNormalize(object):
-    '''
-    Normalize the Image using torch.transforms
-    '''
-    def __init__(self, mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]):
         self.mean = mean
         self.std = std
-    def __call__(self,image):
-        image = normalize(image, self.mean, self.std)
-        return image
 transform = transforms.Compose([GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])])
@@ -50,14 +49,11 @@ def load_image(im_path, hypar):
 def build_model(hypar, device):
     net = hypar["model"]
-    # convert to half precision if needed
-    if(hypar["model_digit"]=="half"):
         net.half()
         for layer in net.modules():
-            if isinstance(layer, nn.BatchNorm2d):
                 layer.float()
     net.to(device)
     if hypar["restore_model"] != "":
         net.load_state_dict(torch.load(os.path.join(hypar["model_path"], hypar["restore_model"]), map_location=device))
@@ -67,24 +63,19 @@ def build_model(hypar, device):
 def predict(net, inputs_val, shapes_val, hypar, device):
     net.eval()
     if hypar["model_digit"] == "full":
         inputs_val = inputs_val.type(torch.FloatTensor)
     else:
         inputs_val = inputs_val.type(torch.HalfTensor)
     inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)
     ds_val = net(inputs_val_v)[0]
     pred_val = ds_val[0][0, :, :, :]
     pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0),
-                                        (shapes_val[0][0], shapes_val[0][1]),
-                                        mode='bilinear'))
     ma = torch.max(pred_val)
     mi = torch.min(pred_val)
-    # normalize to [0, 1], add a small epsilon to avoid division by zero
     pred_val = (pred_val - mi) / (ma - mi + 1e-8)
     if device == 'cuda':
         torch.cuda.empty_cache()
     return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)
@@ -102,51 +93,39 @@ hypar = {
     "model": ISNetDIS()
 }
-# Build the model
 net = build_model(hypar, device)
-def inference(img1, img2, img3, logs):
     """
-    Process up to 3 images in parallel (each can be None if not provided).
     """
     start_time = time.time()
-    logs = logs or ""  # initialize logs if None
-    # Gather images into a list (filter out None)
-    image_paths = [i for i in [img1, img2, img3] if i is not None]
-    if not image_paths:
-        # No images were uploaded
-        logs += f"No images to process.\n"
         return [], logs, logs
     processed_pairs = []
     for path in image_paths:
         image_tensor, orig_size = load_image(path, hypar)
         mask = predict(net, image_tensor, orig_size, hypar, device)
         pil_mask = Image.fromarray(mask).convert('L')
         im_rgb = Image.open(path).convert("RGB")
         im_rgba = im_rgb.copy()
         im_rgba.putalpha(pil_mask)
         processed_pairs.append([im_rgba, pil_mask])
-    end_time = time.time()
-    elapsed = round(end_time - start_time, 2)
-    # Flatten into final gallery list
-    final_images = []
-    for pair in processed_pairs:
-        final_images.extend(pair)
     logs += f"Processed {len(processed_pairs)} image(s) in {elapsed} second(s).\n"
-    # Return the flattened gallery, state, and logs text
     return final_images, logs, logs
 title = "Highly Accurate Dichotomous Image Segmentation"
 description = (
-    "This is an unofficial demo for DIS, a model that can remove the background from up to 3 images. "
-    "Simply upload 1 to 3 images, or use the example images. "
     "GitHub: https://github.com/xuebinqin/DIS<br>"
     "Telegram bot: https://t.me/restoration_photo_bot<br>"
     "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)"
@@ -159,9 +138,7 @@ article = (
 interface = gr.Interface(
     fn=inference,
     inputs=[
-        gr.Image(type='filepath', label='Image 1'),
-        gr.Image(type='filepath', label='Image 2'),
-        gr.Image(type='filepath', label='Image 3'),
         gr.State()
     ],
     outputs=[
@@ -170,8 +147,8 @@ interface = gr.Interface(
         gr.Textbox(label="Logs", lines=6)
     ],
     examples=[
-        ["robot.png", None, None],
-        ["robot.png", "ship.png", None],
     ],
     title=title,
     description=description,

 import time
 warnings.filterwarnings("ignore")
+# Clone the DIS repo and move contents (ensure this runs once per session)
 os.system("git clone https://github.com/xuebinqin/DIS")
 os.system("mv DIS/IS-Net/* .")
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Download official weights if not already present
 if not os.path.exists("saved_models"):
     os.mkdir("saved_models")
     os.system("mv isnet.pth saved_models/")
 class GOSNormalize(object):
+    """
+    Normalize the Image using torch.transforms.
+    """
+    def __init__(self, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
         self.mean = mean
         self.std = std
+    def __call__(self, image):
+        return normalize(image, self.mean, self.std)
 transform = transforms.Compose([GOSNormalize([0.5, 0.5, 0.5], [1.0, 1.0, 1.0])])
 def build_model(hypar, device):
     net = hypar["model"]
+    if hypar["model_digit"] == "half":
         net.half()
         for layer in net.modules():
+            if isinstance(layer, torch.nn.BatchNorm2d):
                 layer.float()
     net.to(device)
     if hypar["restore_model"] != "":
         net.load_state_dict(torch.load(os.path.join(hypar["model_path"], hypar["restore_model"]), map_location=device))
 def predict(net, inputs_val, shapes_val, hypar, device):
     net.eval()
     if hypar["model_digit"] == "full":
         inputs_val = inputs_val.type(torch.FloatTensor)
     else:
         inputs_val = inputs_val.type(torch.HalfTensor)
     inputs_val_v = Variable(inputs_val, requires_grad=False).to(device)
     ds_val = net(inputs_val_v)[0]
     pred_val = ds_val[0][0, :, :, :]
     pred_val = torch.squeeze(F.upsample(torch.unsqueeze(pred_val, 0),
+                                          (shapes_val[0][0], shapes_val[0][1]),
+                                          mode='bilinear'))
     ma = torch.max(pred_val)
     mi = torch.min(pred_val)
     pred_val = (pred_val - mi) / (ma - mi + 1e-8)
     if device == 'cuda':
         torch.cuda.empty_cache()
     return (pred_val.detach().cpu().numpy() * 255).astype(np.uint8)
     "model": ISNetDIS()
 }
 net = build_model(hypar, device)
+def inference(file_paths, logs):
     """
+    Process up to 3 images uploaded via the file uploader.
     """
     start_time = time.time()
+    logs = logs or ""
+    if not file_paths:
+        logs += "No images to process.\n"
         return [], logs, logs
+    # Limit to a maximum of 3 images
+    image_paths = file_paths[:3]
     processed_pairs = []
     for path in image_paths:
         image_tensor, orig_size = load_image(path, hypar)
         mask = predict(net, image_tensor, orig_size, hypar, device)
         pil_mask = Image.fromarray(mask).convert('L')
         im_rgb = Image.open(path).convert("RGB")
         im_rgba = im_rgb.copy()
         im_rgba.putalpha(pil_mask)
         processed_pairs.append([im_rgba, pil_mask])
+    elapsed = round(time.time() - start_time, 2)
+    final_images = [img for pair in processed_pairs for img in pair]
     logs += f"Processed {len(processed_pairs)} image(s) in {elapsed} second(s).\n"
     return final_images, logs, logs
 title = "Highly Accurate Dichotomous Image Segmentation"
 description = (
+    "This is an unofficial demo for DIS, a model that removes the background from images. "
+    "Upload up to 3 images at once using the file uploader below. "
     "GitHub: https://github.com/xuebinqin/DIS<br>"
     "Telegram bot: https://t.me/restoration_photo_bot<br>"
     "[![](https://img.shields.io/twitter/follow/DoEvent?label=@DoEvent&style=social)](https://twitter.com/DoEvent)"
 interface = gr.Interface(
     fn=inference,
     inputs=[
+        gr.File(file_count="multiple", type="filepath", label="Upload Images (up to 3)"),
         gr.State()
     ],
     outputs=[
         gr.Textbox(label="Logs", lines=6)
     ],
     examples=[
+        [["robot.png"], None],
+        [["robot.png", "ship.png"], None],
     ],
     title=title,
     description=description,