Spaces:

Saarthak2002
/

bg_removal

Paused

App Files Files Community

Saarthak2002 commited on Dec 10, 2024

Commit

975418f

verified ·

1 Parent(s): ce52e71

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -14

app.py CHANGED Viewed

@@ -14,13 +14,22 @@ def load_image(url):
     image = Image.open(BytesIO(response.content)).convert("RGB")
     return image
-# Step 2: Crop Image Based on Bounding Box
 def crop_image(image, bounding_box):
     x_min, y_min, x_max, y_max = bounding_box.values()
     return image.crop((x_min, y_min, x_max, y_max))
-# Step 3: Preprocessing for Segmentation Model
-def preprocess_image(image, size=(512, 512)):
     preprocess = transforms.Compose([
         transforms.Resize(size),
         transforms.ToTensor(),
@@ -28,7 +37,7 @@ def preprocess_image(image, size=(512, 512)):
     ])
     return preprocess(image).unsqueeze(0)  # Add batch dimension
-# Step 4: Load a More Robust Pre-trained Model
 def load_model():
     model = deeplabv3_resnet101(pretrained=True)  # Switch to ResNet101 for better feature extraction
     model.eval()  # Set the model to evaluation mode
@@ -36,22 +45,27 @@ def load_model():
         model = model.to("cuda")
     return model
-# Step 5: Perform Segmentation
 def segment_image(model, input_tensor):
     if torch.cuda.is_available():
         input_tensor = input_tensor.to("cuda")
     with torch.no_grad():
         output = model(input_tensor)['out']  # Model output
-        mask = output.argmax(dim=1).squeeze().cpu().numpy()  # Get segmentation mask
         return mask
-# Step 6: Refine Mask and Extract Object
-def apply_mask(image, mask):
-    mask = cv2.resize(mask.astype(np.uint8), image.size, interpolation=cv2.INTER_NEAREST)
-    # Apply morphological operations for cleaner mask
     kernel = np.ones((5, 5), np.uint8)
-    mask = cv2.dilate(mask, kernel, iterations=1)
     mask = cv2.erode(mask, kernel, iterations=1)
     # Create RGBA image
@@ -64,7 +78,7 @@ def apply_mask(image, mask):
 # Gradio Interface to handle input and output
 def segment_object(image_url, x_min, y_min, x_max, y_max):
-    bounding_box = {"x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max}
     # Load and process the image
     image = load_image(image_url)
@@ -95,4 +109,3 @@ iface = gr.Interface(
 # Launch the interface
 iface.launch()

     image = Image.open(BytesIO(response.content)).convert("RGB")
     return image
+# Step 2: Adjust Bounding Box to Add Margin
+def adjust_bounding_box(bounding_box, margin=20):
+    return {
+        "x_min": max(0, bounding_box["x_min"] - margin),
+        "y_min": max(0, bounding_box["y_min"] - margin),
+        "x_max": bounding_box["x_max"] + margin,
+        "y_max": bounding_box["y_max"] + margin,
+    }
+# Step 3: Crop Image Based on Bounding Box
 def crop_image(image, bounding_box):
     x_min, y_min, x_max, y_max = bounding_box.values()
     return image.crop((x_min, y_min, x_max, y_max))
+# Step 4: Preprocessing for Segmentation Model
+def preprocess_image(image, size=(1024, 1024)):
     preprocess = transforms.Compose([
         transforms.Resize(size),
         transforms.ToTensor(),
     ])
     return preprocess(image).unsqueeze(0)  # Add batch dimension
+# Step 5: Load a More Robust Pre-trained Model
 def load_model():
     model = deeplabv3_resnet101(pretrained=True)  # Switch to ResNet101 for better feature extraction
     model.eval()  # Set the model to evaluation mode
         model = model.to("cuda")
     return model
+# Step 6: Perform Segmentation with Soft Masking
 def segment_image(model, input_tensor):
     if torch.cuda.is_available():
         input_tensor = input_tensor.to("cuda")
     with torch.no_grad():
         output = model(input_tensor)['out']  # Model output
+        probabilities = torch.softmax(output, dim=1)  # Get class probabilities
+        mask = probabilities[0, 1].cpu().numpy()  # Assuming 1 corresponds to the object class
         return mask
+# Step 7: Refine Mask and Extract Object
+def apply_mask(image, mask, threshold=0.5):
+    # Threshold the mask
+    mask = (mask > threshold).astype(np.uint8)
+    # Resize mask to the original image size
+    mask = cv2.resize(mask, image.size, interpolation=cv2.INTER_NEAREST)
+    # Apply morphological operations for a cleaner mask
     kernel = np.ones((5, 5), np.uint8)
+    mask = cv2.dilate(mask, kernel, iterations=2)
     mask = cv2.erode(mask, kernel, iterations=1)
     # Create RGBA image
 # Gradio Interface to handle input and output
 def segment_object(image_url, x_min, y_min, x_max, y_max):
+    bounding_box = adjust_bounding_box({"x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max})
     # Load and process the image
     image = load_image(image_url)
 # Launch the interface
 iface.launch()