Spaces:

Saarthak2002
/

bg_removal

Paused

App Files Files Community

Saarthak2002 commited on Dec 10, 2024

Commit

2d2f43a

verified ·

1 Parent(s): f62b518

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -34

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import torch
-from torchvision import transforms
 from PIL import Image
 import requests
 import numpy as np
 import gradio as gr
 from io import BytesIO
-from torchvision.models.segmentation import deeplabv3_resnet101
 import cv2
 # Step 1: Load the Image from URL
@@ -29,57 +28,56 @@ def crop_image(image, bounding_box):
     return image.crop((x_min, y_min, x_max, y_max))
 # Step 4: Preprocessing for Segmentation Model
-def preprocess_image(image, size=(1024, 1024)):
-    preprocess = transforms.Compose([
-        transforms.Resize(size),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
     ])
-    return preprocess(image).unsqueeze(0)  # Add batch dimension
-# Step 5: Load a More Robust Pre-trained Model
 def load_model():
-    model = deeplabv3_resnet101(pretrained=True)  # Switch to ResNet101 for better feature extraction
     model.eval()  # Set the model to evaluation mode
     if torch.cuda.is_available():
         model = model.to("cuda")
     return model
-# Step 6: Perform Segmentation with Soft Masking
-def segment_image(model, input_tensor):
     if torch.cuda.is_available():
         input_tensor = input_tensor.to("cuda")
     with torch.no_grad():
-        output = model(input_tensor)['out']  # Model output
-        probabilities = torch.softmax(output, dim=1)  # Get class probabilities
-        mask = probabilities[0, 1].cpu().numpy()  # Assuming 1 corresponds to the object class
-        return mask
-# Step 7: Refine Mask and Extract Object
-def apply_mask(image, mask, threshold=0.75):
-    # Threshold the mask
-    mask = (mask > threshold).astype(np.uint8)
-    # Resize mask to the original image size
-    mask = cv2.resize(mask, image.size, interpolation=cv2.INTER_NEAREST)
-    # Apply morphological operations for a cleaner mask
-    kernel = np.ones((5, 5), np.uint8)
-    mask = cv2.dilate(mask, kernel, iterations=2)
-    mask = cv2.erode(mask, kernel, iterations=1)
     # Create RGBA image
     image_np = np.array(image)
     rgba_image = np.zeros((image_np.shape[0], image_np.shape[1], 4), dtype=np.uint8)
     rgba_image[..., :3] = image_np  # Copy RGB channels
-    rgba_image[..., 3] = mask * 255  # Alpha channel based on refined mask
     return Image.fromarray(rgba_image)
 # Gradio Interface to handle input and output
 def segment_object(image_url, x_min, y_min, x_max, y_max):
     bounding_box = adjust_bounding_box({"x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max})
     # Load and process the image
     image = load_image(image_url)
     cropped_image = crop_image(image, bounding_box)
@@ -87,10 +85,10 @@ def segment_object(image_url, x_min, y_min, x_max, y_max):
     # Load model and perform segmentation
     model = load_model()
-    mask = segment_image(model, input_tensor)
-    # Apply mask to extract object
-    result_image = apply_mask(cropped_image, mask)
     return result_image
 # Set up the Gradio Interface

 import torch
+from torchvision import models, transforms
 from PIL import Image
 import requests
 import numpy as np
 import gradio as gr
 from io import BytesIO
 import cv2
 # Step 1: Load the Image from URL
     return image.crop((x_min, y_min, x_max, y_max))
 # Step 4: Preprocessing for Segmentation Model
+def preprocess_image(image):
+    transform = transforms.Compose([
+        transforms.ToTensor(),  # Convert to Tensor
     ])
+    return transform(image).unsqueeze(0)  # Add batch dimension
+# Step 5: Load Mask R-CNN Model
 def load_model():
+    model = models.detection.maskrcnn_resnet50_fpn(pretrained=True)  # Pre-trained Mask R-CNN
     model.eval()  # Set the model to evaluation mode
     if torch.cuda.is_available():
         model = model.to("cuda")
     return model
+# Step 6: Perform Object Segmentation
+def segment_image(model, input_tensor, confidence_threshold=0.6):
     if torch.cuda.is_available():
         input_tensor = input_tensor.to("cuda")
     with torch.no_grad():
+        outputs = model(input_tensor)  # Perform inference
+    # Process results: filter by confidence and get masks
+    scores = outputs[0]["scores"].cpu().numpy()
+    masks = outputs[0]["masks"].cpu().numpy()
+    boxes = outputs[0]["boxes"].cpu().numpy()
+    # Filter masks based on confidence threshold
+    filtered_masks = [masks[i, 0] for i in range(len(scores)) if scores[i] > confidence_threshold]
+    return filtered_masks
+# Step 7: Combine Masks and Extract Object
+def apply_masks(image, masks):
+    combined_mask = np.zeros((image.height, image.width), dtype=np.uint8)
+    for mask in masks:
+        resized_mask = cv2.resize(mask, (image.width, image.height), interpolation=cv2.INTER_NEAREST)
+        combined_mask = np.maximum(combined_mask, (resized_mask > 0.5).astype(np.uint8))  # Combine masks
     # Create RGBA image
     image_np = np.array(image)
     rgba_image = np.zeros((image_np.shape[0], image_np.shape[1], 4), dtype=np.uint8)
     rgba_image[..., :3] = image_np  # Copy RGB channels
+    rgba_image[..., 3] = combined_mask * 255  # Alpha channel based on combined mask
     return Image.fromarray(rgba_image)
 # Gradio Interface to handle input and output
 def segment_object(image_url, x_min, y_min, x_max, y_max):
     bounding_box = adjust_bounding_box({"x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max})
     # Load and process the image
     image = load_image(image_url)
     cropped_image = crop_image(image, bounding_box)
     # Load model and perform segmentation
     model = load_model()
+    masks = segment_image(model, input_tensor)
+    # Apply masks to extract objects
+    result_image = apply_masks(cropped_image, masks)
     return result_image
 # Set up the Gradio Interface