Spaces:

aagoluoglu
/

SAMSidewalksDemo

Runtime error

App Files Files Community

aagoluoglu commited on Apr 25, 2024

Commit

d5f906d

verified ·

1 Parent(s): ace1b10

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -18

app.py CHANGED Viewed

@@ -51,6 +51,7 @@ app_ui = ui.page_fillable(
             ui.input_switch("show_margins", "Show marginal plots", value=True),
         ),
         ui.output_image("uploaded_image"),  # display the uploaded TIFF sidewalk tile image
         ui.output_ui("value_boxes"),
         ui.output_plot("scatter", fill=True),
         ui.help_text(
@@ -70,7 +71,29 @@ def tif_bytes_to_pil_image(tif_bytes):
   return image
 def server(input: Inputs, output: Outputs, session: Session):
     @reactive.Calc
     def uploaded_image_path() -> str:
         """Returns the path to the uploaded image"""
@@ -88,34 +111,75 @@ def server(input: Inputs, output: Outputs, session: Session):
             return img
         else:
             return None  # Return an empty string if no image is uploaded
     def process_image():
         """Processes the uploaded image, loads the model, and evaluates to get predictions"""
         # Load the uploaded image
         uploaded_image_bytes = input.tile_image()[0].read()
         # Convert the uploaded TIFF bytes to a PIL Image object
         uploaded_image = tif_bytes_to_pil_image(uploaded_image_bytes)
-        # Perform any preprocessing steps on the image as needed
-        # Example: Convert the image to the required input format for the model
-        # image_array = preprocess_image(uploaded_image)
-        # Load the model configuration
-        model_config = SamConfig.from_pretrained("facebook/sam-vit-base")
-        processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
-        # Create an instance of the model architecture with the loaded configuration
-        model = SamModel(config=model_config)
-        # Update the model by loading the weights from saved file
-        model_state_dict = torch.load(str(dir / "checkpoint.pth"), map_location=torch.device('cpu'))
-        model.load_state_dict(model_state_dict)
-        # set the device to cuda if available, otherwise use cpu
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        model.to(device)
         # Evaluate the image with the model
         # Example: predictions = model.predict(image_array)

             ui.input_switch("show_margins", "Show marginal plots", value=True),
         ),
         ui.output_image("uploaded_image"),  # display the uploaded TIFF sidewalk tile image
+        ui.output_text("processed_output")
         ui.output_ui("value_boxes"),
         ui.output_plot("scatter", fill=True),
         ui.help_text(
   return image
+def load_model():
+    """ Get Model """
+    # Load the model configuration
+    model_config = SamConfig.from_pretrained("facebook/sam-vit-base")
+    processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
+    # Create an instance of the model architecture with the loaded configuration
+    model = SamModel(config=model_config)
+    # Update the model by loading the weights from saved file
+    model_state_dict = torch.load(str(dir / "checkpoint.pth"), map_location=torch.device('cpu'))
+    model.load_state_dict(model_state_dict)
+    # set the device to cuda if available, otherwise use cpu
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.to(device)
+    return model, processor
 def server(input: Inputs, output: Outputs, session: Session):
+    # load model and processor once
+    model, processor = load_model()
     @reactive.Calc
     def uploaded_image_path() -> str:
         """Returns the path to the uploaded image"""
             return img
         else:
             return None  # Return an empty string if no image is uploaded
+    @reactive.Calc
+    def generate_input_points():
+        """
+        input_points (torch.FloatTensor of shape (batch_size, num_points, 2)) —
+        Input 2D spatial points, this is used by the prompt encoder to encode the prompt.
+        Generally yields to much better results. The points can be obtained by passing a
+        list of list of list to the processor that will create corresponding torch tensors
+        of dimension 4. The first dimension is the image batch size, the second dimension
+        is the point batch size (i.e. how many segmentation masks do we want the model to
+        predict per input point), the third dimension is the number of points per segmentation
+        mask (it is possible to pass multiple points for a single mask), and the last dimension
+        is the x (vertical) and y (horizontal) coordinates of the point. If a different number
+        of points is passed either for each image, or for each mask, the processor will create
+        “PAD” points that will correspond to the (0, 0) coordinate, and the computation of the
+        embedding will be skipped for these points using the labels.
+        """
+        # Define the size of your array
+        array_size = 256
+        # Define the size of your grid
+        grid_size = 10
+        # Generate the grid points
+        x = np.linspace(0, array_size-1, grid_size)
+        y = np.linspace(0, array_size-1, grid_size)
+        # Generate a grid of coordinates
+        xv, yv = np.meshgrid(x, y)
+        # Convert the numpy arrays to lists
+        xv_list = xv.tolist()
+        yv_list = yv.tolist()
+        # Combine the x and y coordinates into a list of list of lists
+        input_points = [[[int(x), int(y)] for x, y in zip(x_row, y_row)] for x_row, y_row in zip(xv_list, yv_list)]
+        #We need to reshape our nxn grid to the expected shape of the input_points tensor
+        # (batch_size, point_batch_size, num_points_per_image, 2),
+        # where the last dimension of 2 represents the x and y coordinates of each point.
+        #batch_size: The number of images you're processing at once.
+        #point_batch_size: The number of point sets you have for each image.
+        #num_points_per_image: The number of points in each set.
+        input_points = torch.tensor(input_points).view(1, 1, grid_size*grid_size, 2)
+        return input_points
     def process_image():
         """Processes the uploaded image, loads the model, and evaluates to get predictions"""
+        """ Get Image """
         # Load the uploaded image
         uploaded_image_bytes = input.tile_image()[0].read()
         # Convert the uploaded TIFF bytes to a PIL Image object
         uploaded_image = tif_bytes_to_pil_image(uploaded_image_bytes)
+        """ Prepare Inputs """
+        # get input points prompt (grid of points)
+        input_points = generate_input_points(image)
+        # prepare image and prompt for the model
+        inputs = processor(image, input_points=input_points, return_tensors="pt")
+        # remove batch dimension which the processor adds by default
+        inputs = {k:v.squeeze(0) for k,v in inputs.items()}
+        """ Get Predictions """
         # Evaluate the image with the model
         # Example: predictions = model.predict(image_array)