Saarthak2002 commited on
Commit
2d2f43a
·
verified ·
1 Parent(s): f62b518

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -34
app.py CHANGED
@@ -1,11 +1,10 @@
1
  import torch
2
- from torchvision import transforms
3
  from PIL import Image
4
  import requests
5
  import numpy as np
6
  import gradio as gr
7
  from io import BytesIO
8
- from torchvision.models.segmentation import deeplabv3_resnet101
9
  import cv2
10
 
11
  # Step 1: Load the Image from URL
@@ -29,57 +28,56 @@ def crop_image(image, bounding_box):
29
  return image.crop((x_min, y_min, x_max, y_max))
30
 
31
  # Step 4: Preprocessing for Segmentation Model
32
- def preprocess_image(image, size=(1024, 1024)):
33
- preprocess = transforms.Compose([
34
- transforms.Resize(size),
35
- transforms.ToTensor(),
36
- transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
37
  ])
38
- return preprocess(image).unsqueeze(0) # Add batch dimension
39
 
40
- # Step 5: Load a More Robust Pre-trained Model
41
  def load_model():
42
- model = deeplabv3_resnet101(pretrained=True) # Switch to ResNet101 for better feature extraction
43
  model.eval() # Set the model to evaluation mode
44
  if torch.cuda.is_available():
45
  model = model.to("cuda")
46
  return model
47
 
48
- # Step 6: Perform Segmentation with Soft Masking
49
- def segment_image(model, input_tensor):
50
  if torch.cuda.is_available():
51
  input_tensor = input_tensor.to("cuda")
52
  with torch.no_grad():
53
- output = model(input_tensor)['out'] # Model output
54
- probabilities = torch.softmax(output, dim=1) # Get class probabilities
55
- mask = probabilities[0, 1].cpu().numpy() # Assuming 1 corresponds to the object class
56
- return mask
57
-
58
- # Step 7: Refine Mask and Extract Object
59
- def apply_mask(image, mask, threshold=0.75):
60
- # Threshold the mask
61
- mask = (mask > threshold).astype(np.uint8)
62
-
63
- # Resize mask to the original image size
64
- mask = cv2.resize(mask, image.size, interpolation=cv2.INTER_NEAREST)
65
-
66
- # Apply morphological operations for a cleaner mask
67
- kernel = np.ones((5, 5), np.uint8)
68
- mask = cv2.dilate(mask, kernel, iterations=2)
69
- mask = cv2.erode(mask, kernel, iterations=1)
 
70
 
71
  # Create RGBA image
72
  image_np = np.array(image)
73
  rgba_image = np.zeros((image_np.shape[0], image_np.shape[1], 4), dtype=np.uint8)
74
  rgba_image[..., :3] = image_np # Copy RGB channels
75
- rgba_image[..., 3] = mask * 255 # Alpha channel based on refined mask
76
 
77
  return Image.fromarray(rgba_image)
78
 
79
  # Gradio Interface to handle input and output
80
  def segment_object(image_url, x_min, y_min, x_max, y_max):
81
  bounding_box = adjust_bounding_box({"x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max})
82
-
83
  # Load and process the image
84
  image = load_image(image_url)
85
  cropped_image = crop_image(image, bounding_box)
@@ -87,10 +85,10 @@ def segment_object(image_url, x_min, y_min, x_max, y_max):
87
 
88
  # Load model and perform segmentation
89
  model = load_model()
90
- mask = segment_image(model, input_tensor)
91
 
92
- # Apply mask to extract object
93
- result_image = apply_mask(cropped_image, mask)
94
  return result_image
95
 
96
  # Set up the Gradio Interface
 
1
  import torch
2
+ from torchvision import models, transforms
3
  from PIL import Image
4
  import requests
5
  import numpy as np
6
  import gradio as gr
7
  from io import BytesIO
 
8
  import cv2
9
 
10
  # Step 1: Load the Image from URL
 
28
  return image.crop((x_min, y_min, x_max, y_max))
29
 
30
  # Step 4: Preprocessing for Segmentation Model
31
+ def preprocess_image(image):
32
+ transform = transforms.Compose([
33
+ transforms.ToTensor(), # Convert to Tensor
 
 
34
  ])
35
+ return transform(image).unsqueeze(0) # Add batch dimension
36
 
37
+ # Step 5: Load Mask R-CNN Model
38
  def load_model():
39
+ model = models.detection.maskrcnn_resnet50_fpn(pretrained=True) # Pre-trained Mask R-CNN
40
  model.eval() # Set the model to evaluation mode
41
  if torch.cuda.is_available():
42
  model = model.to("cuda")
43
  return model
44
 
45
+ # Step 6: Perform Object Segmentation
46
+ def segment_image(model, input_tensor, confidence_threshold=0.6):
47
  if torch.cuda.is_available():
48
  input_tensor = input_tensor.to("cuda")
49
  with torch.no_grad():
50
+ outputs = model(input_tensor) # Perform inference
51
+
52
+ # Process results: filter by confidence and get masks
53
+ scores = outputs[0]["scores"].cpu().numpy()
54
+ masks = outputs[0]["masks"].cpu().numpy()
55
+ boxes = outputs[0]["boxes"].cpu().numpy()
56
+
57
+ # Filter masks based on confidence threshold
58
+ filtered_masks = [masks[i, 0] for i in range(len(scores)) if scores[i] > confidence_threshold]
59
+ return filtered_masks
60
+
61
+ # Step 7: Combine Masks and Extract Object
62
+ def apply_masks(image, masks):
63
+ combined_mask = np.zeros((image.height, image.width), dtype=np.uint8)
64
+
65
+ for mask in masks:
66
+ resized_mask = cv2.resize(mask, (image.width, image.height), interpolation=cv2.INTER_NEAREST)
67
+ combined_mask = np.maximum(combined_mask, (resized_mask > 0.5).astype(np.uint8)) # Combine masks
68
 
69
  # Create RGBA image
70
  image_np = np.array(image)
71
  rgba_image = np.zeros((image_np.shape[0], image_np.shape[1], 4), dtype=np.uint8)
72
  rgba_image[..., :3] = image_np # Copy RGB channels
73
+ rgba_image[..., 3] = combined_mask * 255 # Alpha channel based on combined mask
74
 
75
  return Image.fromarray(rgba_image)
76
 
77
  # Gradio Interface to handle input and output
78
  def segment_object(image_url, x_min, y_min, x_max, y_max):
79
  bounding_box = adjust_bounding_box({"x_min": x_min, "y_min": y_min, "x_max": x_max, "y_max": y_max})
80
+
81
  # Load and process the image
82
  image = load_image(image_url)
83
  cropped_image = crop_image(image, bounding_box)
 
85
 
86
  # Load model and perform segmentation
87
  model = load_model()
88
+ masks = segment_image(model, input_tensor)
89
 
90
+ # Apply masks to extract objects
91
+ result_image = apply_masks(cropped_image, masks)
92
  return result_image
93
 
94
  # Set up the Gradio Interface