Spaces:

Saarthak2002
/

bg_removal

Paused

App Files Files Community

Saarthak2002 commited on Dec 10, 2024

Commit

ace5a98

verified ·

1 Parent(s): 7990efc

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -12

app.py CHANGED Viewed

@@ -5,7 +5,8 @@ import requests
 import numpy as np
 import gradio as gr
 from io import BytesIO
-from torchvision.models.segmentation import deeplabv3_resnet50
 # Step 1: Load the Image from URL
 def load_image(url):
@@ -18,8 +19,8 @@ def crop_image(image, bounding_box):
     x_min, y_min, x_max, y_max = bounding_box.values()
     return image.crop((x_min, y_min, x_max, y_max))
-# Step 3: Preprocessing for U-Net (DeepLabV3 in this case)
-def preprocess_image(image, size=(256, 256)):
     preprocess = transforms.Compose([
         transforms.Resize(size),
         transforms.ToTensor(),
@@ -27,29 +28,38 @@ def preprocess_image(image, size=(256, 256)):
     ])
     return preprocess(image).unsqueeze(0)  # Add batch dimension
-# Step 4: Load Pre-trained Segmentation Model (DeepLabV3)
 def load_model():
-    model = deeplabv3_resnet50(pretrained=True)
     model.eval()  # Set the model to evaluation mode
     return model
 # Step 5: Perform Segmentation
 def segment_image(model, input_tensor):
     with torch.no_grad():
         output = model(input_tensor)['out']  # Model output
         mask = output.argmax(dim=1).squeeze().cpu().numpy()  # Get segmentation mask
         return mask
-# Step 6: Postprocess and Extract Object
-def apply_mask(image, mask, threshold=1):
-    mask_resized = Image.fromarray((mask * 255).astype(np.uint8)).resize(image.size, Image.NEAREST)
-    mask_resized = np.array(mask_resized) > threshold
-    image_np = np.array(image)
-    # Create RGBA image with transparency
     rgba_image = np.zeros((image_np.shape[0], image_np.shape[1], 4), dtype=np.uint8)
     rgba_image[..., :3] = image_np  # Copy RGB channels
-    rgba_image[..., 3] = mask_resized.astype(np.uint8) * 255  # Alpha channel based on mask
     return Image.fromarray(rgba_image)
 # Gradio Interface to handle input and output
@@ -85,3 +95,4 @@ iface = gr.Interface(
 # Launch the interface
 iface.launch()

 import numpy as np
 import gradio as gr
 from io import BytesIO
+from torchvision.models.segmentation import deeplabv3_resnet101
+import cv2
 # Step 1: Load the Image from URL
 def load_image(url):
     x_min, y_min, x_max, y_max = bounding_box.values()
     return image.crop((x_min, y_min, x_max, y_max))
+# Step 3: Preprocessing for Segmentation Model
+def preprocess_image(image, size=(512, 512)):
     preprocess = transforms.Compose([
         transforms.Resize(size),
         transforms.ToTensor(),
     ])
     return preprocess(image).unsqueeze(0)  # Add batch dimension
+# Step 4: Load a More Robust Pre-trained Model
 def load_model():
+    model = deeplabv3_resnet101(pretrained=True)  # Switch to ResNet101 for better feature extraction
     model.eval()  # Set the model to evaluation mode
+    if torch.cuda.is_available():
+        model = model.to("cuda")
     return model
 # Step 5: Perform Segmentation
 def segment_image(model, input_tensor):
+    if torch.cuda.is_available():
+        input_tensor = input_tensor.to("cuda")
     with torch.no_grad():
         output = model(input_tensor)['out']  # Model output
         mask = output.argmax(dim=1).squeeze().cpu().numpy()  # Get segmentation mask
         return mask
+# Step 6: Refine Mask and Extract Object
+def apply_mask(image, mask):
+    mask = cv2.resize(mask.astype(np.uint8), image.size, interpolation=cv2.INTER_NEAREST)
+    # Apply morphological operations for cleaner mask
+    kernel = np.ones((5, 5), np.uint8)
+    mask = cv2.dilate(mask, kernel, iterations=1)
+    mask = cv2.erode(mask, kernel, iterations=1)
+    # Create RGBA image
+    image_np = np.array(image)
     rgba_image = np.zeros((image_np.shape[0], image_np.shape[1], 4), dtype=np.uint8)
     rgba_image[..., :3] = image_np  # Copy RGB channels
+    rgba_image[..., 3] = mask * 255  # Alpha channel based on refined mask
     return Image.fromarray(rgba_image)
 # Gradio Interface to handle input and output
 # Launch the interface
 iface.launch()