Spaces:

Saarthak2002
/

bg_removal

Paused

App Files Files Community

Saarthak2002 commited on Dec 9, 2024

Commit

b07764a

verified ·

1 Parent(s): c4d88ec

Update main.py

Browse files

Files changed (1) hide show

main.py +87 -0

main.py CHANGED Viewed

	@@ -0,0 +1,87 @@

+import torch
+from torchvision import transforms
+from PIL import Image
+import requests
+import numpy as np
+import gradio as gr
+from io import BytesIO
+from torchvision.models.segmentation import deeplabv3_resnet50
+# Step 1: Load the Image from URL
+def load_image(url):
+    response = requests.get(url)
+    image = Image.open(BytesIO(response.content)).convert("RGB")
+    return image
+# Step 2: Crop Image Based on Bounding Box
+def crop_image(image, bounding_box):
+    x_min, y_min, x_max, y_max = bounding_box.values()
+    return image.crop((x_min, y_min, x_max, y_max))
+# Step 3: Preprocessing for U-Net (DeepLabV3 in this case)
+def preprocess_image(image, size=(256, 256)):
+    preprocess = transforms.Compose([
+        transforms.Resize(size),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    return preprocess(image).unsqueeze(0)  # Add batch dimension
+# Step 4: Load Pre-trained Segmentation Model (DeepLabV3)
+def load_model():
+    model = deeplabv3_resnet50(pretrained=True)
+    model.eval()  # Set the model to evaluation mode
+    return model
+# Step 5: Perform Segmentation
+def segment_image(model, input_tensor):
+    with torch.no_grad():
+        output = model(input_tensor)['out']  # Model output
+        mask = output.argmax(dim=1).squeeze().cpu().numpy()  # Get segmentation mask
+        return mask
+# Step 6: Postprocess and Extract Object
+def apply_mask(image, mask, threshold=1):
+    mask_resized = Image.fromarray((mask * 255).astype(np.uint8)).resize(image.size, Image.NEAREST)
+    mask_resized = np.array(mask_resized) > threshold
+    image_np = np.array(image)
+    # Create RGBA image with transparency
+    rgba_image = np.zeros((image_np.shape[0], image_np.shape[1], 4), dtype=np.uint8)
+    rgba_image[..., :3] = image_np  # Copy RGB channels
+    rgba_image[..., 3] = mask_resized.astype(np.uint8) * 255  # Alpha channel based on mask
+    return Image.fromarray(rgba_image)
+# Gradio Interface to handle input and output
+def segment_object(image_url, x_min, y_min, x_max, y_max):
+    bounding_box = {"x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max}
+    # Load and process the image
+    image = load_image(image_url)
+    cropped_image = crop_image(image, bounding_box)
+    input_tensor = preprocess_image(cropped_image)
+    # Load model and perform segmentation
+    model = load_model()
+    mask = segment_image(model, input_tensor)
+    # Apply mask to extract object
+    result_image = apply_mask(cropped_image, mask)
+    return result_image
+# Set up the Gradio Interface
+iface = gr.Interface(
+    fn=segment_object,
+    inputs=[
+        gr.Textbox(label="Image URL", placeholder="Enter image URL..."),
+        gr.Number(label="x_min", value=100),
+        gr.Number(label="y_min", value=100),
+        gr.Number(label="x_max", value=600),
+        gr.Number(label="y_max", value=400),
+    ],
+    outputs=gr.Image(label="Segmented Image"),
+    live=True
+)
+# Launch the interface
+iface.launch()