Spaces:

aagoluoglu
/

SAMSidewalksDemo

Runtime error

App Files Files Community

aagoluoglu commited on Apr 30, 2024

Commit

bd2df77

1 Parent(s): 6d6e3fa

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -95

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ app_ui = ui.page_fillable(
             ui.input_switch("by_species", "Show species", value=True),
             ui.input_switch("show_margins", "Show marginal plots", value=True),
         ),
-        ui.output_image("uploaded_image"),  # display the uploaded sidewalk tile image
         ui.output_plot("prediction_plots", fill=True),
         ui.output_ui("value_boxes"),
         ui.output_plot("scatter", fill=True),
@@ -106,6 +106,90 @@ def show_mask(mask, ax, random_color=False):
     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
     ax.imshow(mask_image)
 ### SERVER ###
 def server(input: Inputs, output: Outputs, session: Session):
@@ -121,100 +205,18 @@ def server(input: Inputs, output: Outputs, session: Session):
         else:
             return ""  # No image uploaded
-    @render.image
-    def uploaded_image():
-        """Displays the uploaded image"""
-        img_src = uploaded_image_path()
-        if img_src:
-            img: ImgData = {"src": str(img_src), "width": "200px"}
-            print("IMAGE", img)
-            return img
-        else:
-            return None  # Return an empty string if no image is uploaded
-    def generate_input_points(image, grid_size=10):
-        """
-        input_points (torch.FloatTensor of shape (batch_size, num_points, 2)) —
-        Input 2D spatial points, this is used by the prompt encoder to encode the prompt.
-        Generally yields to much better results. The points can be obtained by passing a
-        list of list of list to the processor that will create corresponding torch tensors
-        of dimension 4. The first dimension is the image batch size, the second dimension
-        is the point batch size (i.e. how many segmentation masks do we want the model to
-        predict per input point), the third dimension is the number of points per segmentation
-        mask (it is possible to pass multiple points for a single mask), and the last dimension
-        is the x (vertical) and y (horizontal) coordinates of the point. If a different number
-        of points is passed either for each image, or for each mask, the processor will create
-        “PAD” points that will correspond to the (0, 0) coordinate, and the computation of the
-        embedding will be skipped for these points using the labels.
-        """
-        # Get the dimensions of the image
-        array_size = max(image.width, image.height)
-        # Generate the grid points
-        x = np.linspace(0, array_size-1, grid_size)
-        y = np.linspace(0, array_size-1, grid_size)
-        # Generate a grid of coordinates
-        xv, yv = np.meshgrid(x, y)
-        # Convert the numpy arrays to lists
-        xv_list = xv.tolist()
-        yv_list = yv.tolist()
-        # Combine the x and y coordinates into a list of list of lists
-        input_points = [[[int(x), int(y)] for x, y in zip(x_row, y_row)] for x_row, y_row in zip(xv_list, yv_list)]
-        #We need to reshape our nxn grid to the expected shape of the input_points tensor
-        # (batch_size, point_batch_size, num_points_per_image, 2),
-        # where the last dimension of 2 represents the x and y coordinates of each point.
-        #batch_size: The number of images you're processing at once.
-        #point_batch_size: The number of point sets you have for each image.
-        #num_points_per_image: The number of points in each set.
-        input_points = torch.tensor(input_points).view(1, 1, grid_size*grid_size, 2)
-        return input_points
-    def process_image():
-        """Processes the uploaded image, loads the model, and evaluates to get predictions"""
-        """ Get Image """
-        img_src = uploaded_image_path()
-        # Read the image bytes from the file
-        with open(img_src, 'rb') as f:
-            image_bytes = f.read()
-        # Convert the image bytes to a PIL Image
-        image = bytes_to_pil_image(image_bytes)
-        """ Prepare Inputs """
-        # get input points prompt (grid of points)
-        input_points = generate_input_points(image)
-        # prepare image and prompt for the model
-        inputs = processor(image, input_points=input_points, return_tensors="pt")
-        # # remove batch dimension which the processor adds by default
-        # inputs = {k:v.squeeze(0) for k,v in inputs.items()}
-        # Move the input tensor to the GPU if it's not already there
-        inputs = {k: v.to(device) for k, v in inputs.items()}
-        """ Get Predictions """
-        # forward pass
-        with torch.no_grad():
-            outputs = model(**inputs, multimask_output=False)
-        # apply sigmoid
-        prob = torch.sigmoid(outputs.pred_masks.squeeze(1))
-        # convert soft mask to hard mask
-        prob = prob.cpu().numpy().squeeze()
-        prediction = (prob > 0.5).astype(np.uint8)
-        # Return the processed result
-        return image, prob, prediction
     @reactive.Calc
     def get_predictions():

             ui.input_switch("by_species", "Show species", value=True),
             ui.input_switch("show_margins", "Show marginal plots", value=True),
         ),
+        #ui.output_image("uploaded_image"),  # display the uploaded sidewalk tile image, for some reason doesn't work on all accepted files
         ui.output_plot("prediction_plots", fill=True),
         ui.output_ui("value_boxes"),
         ui.output_plot("scatter", fill=True),
     mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
     ax.imshow(mask_image)
+def generate_input_points(image, grid_size=10):
+    """
+    input_points (torch.FloatTensor of shape (batch_size, num_points, 2)) —
+    Input 2D spatial points, this is used by the prompt encoder to encode the prompt.
+    Generally yields to much better results. The points can be obtained by passing a
+    list of list of list to the processor that will create corresponding torch tensors
+    of dimension 4. The first dimension is the image batch size, the second dimension
+    is the point batch size (i.e. how many segmentation masks do we want the model to
+    predict per input point), the third dimension is the number of points per segmentation
+    mask (it is possible to pass multiple points for a single mask), and the last dimension
+    is the x (vertical) and y (horizontal) coordinates of the point. If a different number
+    of points is passed either for each image, or for each mask, the processor will create
+    “PAD” points that will correspond to the (0, 0) coordinate, and the computation of the
+    embedding will be skipped for these points using the labels.
+    """
+    # Get the dimensions of the image
+    array_size = max(image.width, image.height)
+    # Generate the grid points
+    x = np.linspace(0, array_size-1, grid_size)
+    y = np.linspace(0, array_size-1, grid_size)
+    # Generate a grid of coordinates
+    xv, yv = np.meshgrid(x, y)
+    # Convert the numpy arrays to lists
+    xv_list = xv.tolist()
+    yv_list = yv.tolist()
+    # Combine the x and y coordinates into a list of list of lists
+    input_points = [[[int(x), int(y)] for x, y in zip(x_row, y_row)] for x_row, y_row in zip(xv_list, yv_list)]
+    #We need to reshape our nxn grid to the expected shape of the input_points tensor
+    # (batch_size, point_batch_size, num_points_per_image, 2),
+    # where the last dimension of 2 represents the x and y coordinates of each point.
+    #batch_size: The number of images you're processing at once.
+    #point_batch_size: The number of point sets you have for each image.
+    #num_points_per_image: The number of points in each set.
+    input_points = torch.tensor(input_points).view(1, 1, grid_size*grid_size, 2)
+    return input_points
+def process_image():
+    """Processes the uploaded image, loads the model, and evaluates to get predictions"""
+    """ Get Image """
+    img_src = uploaded_image_path()
+    # Read the image bytes from the file
+    with open(img_src, 'rb') as f:
+        image_bytes = f.read()
+    # Convert the image bytes to a PIL Image
+    image = bytes_to_pil_image(image_bytes)
+    """ Prepare Inputs """
+    # get input points prompt (grid of points)
+    input_points = generate_input_points(image)
+    # prepare image and prompt for the model
+    inputs = processor(image, input_points=input_points, return_tensors="pt")
+    # # remove batch dimension which the processor adds by default
+    # inputs = {k:v.squeeze(0) for k,v in inputs.items()}
+    # Move the input tensor to the GPU if it's not already there
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    """ Get Predictions """
+    # forward pass
+    with torch.no_grad():
+        outputs = model(**inputs, multimask_output=False)
+    # apply sigmoid
+    prob = torch.sigmoid(outputs.pred_masks.squeeze(1))
+    # convert soft mask to hard mask
+    prob = prob.cpu().numpy().squeeze()
+    prediction = (prob > 0.5).astype(np.uint8)
+    # Return the processed result
+    return image, prob, prediction
 ### SERVER ###
 def server(input: Inputs, output: Outputs, session: Session):
         else:
             return ""  # No image uploaded
+    # for some reason below function does not work on all accepted files
+    # works on one screenshot that was converted to .tif but not another *shrug*
+    # @render.image
+    # def uploaded_image():
+    #     """Displays the uploaded image"""
+    #     img_src = uploaded_image_path()
+    #     if img_src:
+    #         img: ImgData = {"src": str(img_src), "width": "200px"}
+    #         print("IMAGE", img)
+    #         return img
+    #     else:
+    #         return None  # Return an empty string if no image is uploaded
     @reactive.Calc
     def get_predictions():