Spaces:

Filipstrozik
/

sat-tree-detection-v0-demo

Sleeping

Filipstrozik commited on Jan 20

Commit

afc2161

1 Parent(s): 2c3fe3e

Add initial implementation of EllipseRCNN model and dataset utilities

- Introduced core model and loss functions for ellipse detection.
- Added dataset classes for loading and processing crater images.
- Included type definitions for better code clarity.
- Created requirements file for necessary dependencies.
- Added README documentation for core functionalities.

Files changed (33) hide show

app.py +225 -0
ellipse_rcnn/__init__.py +1 -0
ellipse_rcnn/__pycache__/__init__.cpython-312.pyc +0 -0
ellipse_rcnn/core/README.md +40 -0
ellipse_rcnn/core/__init__.py +1 -0
ellipse_rcnn/core/__pycache__/__init__.cpython-312.pyc +0 -0
ellipse_rcnn/core/__pycache__/ellipse_roi_head.cpython-312.pyc +0 -0
ellipse_rcnn/core/__pycache__/kld.cpython-312.pyc +0 -0
ellipse_rcnn/core/__pycache__/model.cpython-312.pyc +0 -0
ellipse_rcnn/core/__pycache__/wd.cpython-312.pyc +0 -0
ellipse_rcnn/core/ellipse_roi_head.py +429 -0
ellipse_rcnn/core/ga.py +67 -0
ellipse_rcnn/core/kld.py +124 -0
ellipse_rcnn/core/model.py +282 -0
ellipse_rcnn/core/wd.py +128 -0
ellipse_rcnn/utils/__init__.py +0 -0
ellipse_rcnn/utils/__pycache__/__init__.cpython-312.pyc +0 -0
ellipse_rcnn/utils/__pycache__/conics.cpython-312.pyc +0 -0
ellipse_rcnn/utils/__pycache__/types.cpython-312.pyc +0 -0
ellipse_rcnn/utils/__pycache__/viz.cpython-312.pyc +0 -0
ellipse_rcnn/utils/conics.py +209 -0
ellipse_rcnn/utils/data/__init__.py +0 -0
ellipse_rcnn/utils/data/__pycache__/__init__.cpython-312.pyc +0 -0
ellipse_rcnn/utils/data/__pycache__/base.cpython-312.pyc +0 -0
ellipse_rcnn/utils/data/base.py +62 -0
ellipse_rcnn/utils/data/craters.py +54 -0
ellipse_rcnn/utils/data/fddb.py +239 -0
ellipse_rcnn/utils/types.py +46 -0
ellipse_rcnn/utils/viz.py +106 -0
examples/image1.jpg +0 -0
examples/image2.jpg +0 -0
examples/image3.jpg +0 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,225 @@

+import io
+from ast import mod
+import gradio as gr
+from PIL import Image
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import torchvision.transforms as transforms
+import torch
+from huggingface_hub import hf_hub_download
+from ellipse_rcnn import EllipseRCNN
+# load model.pth from Filipstrozik/sat-tree-detection-v0 repository in hugging face
+load_state_dict = torch.load(
+    hf_hub_download("Filipstrozik/sat-tree-detection-v0", "model.pth"),
+    weights_only=True,
+)
+model = EllipseRCNN()
+model.load_state_dict(load_state_dict)
+model.eval()
+def conic_center(conic_matrix: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+    """Returns center of ellipse in 2D cartesian coordinate system with numerical stability."""
+    # Extract the top-left 2x2 submatrix of the conic matrix
+    A = conic_matrix[..., :2, :2]
+    # Add stabilization for pseudoinverse computation by clamping singular values
+    A_pinv = torch.linalg.pinv(A, rcond=torch.finfo(A.dtype).eps)
+    # Extract the last two rows for the linear term
+    b = -conic_matrix[..., :2, 2][..., None]
+    # Stabilize any potential numerical instabilities
+    centers = torch.matmul(A_pinv, b).squeeze()
+    return centers[..., 0], centers[..., 1]
+def ellipse_axes(conic_matrix: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+    """Returns semi-major and semi-minor axes of ellipse in 2D cartesian coordinate system."""
+    lambdas = (
+        torch.linalg.eigvalsh(conic_matrix[..., :2, :2])
+        / (-torch.det(conic_matrix) / torch.det(conic_matrix[..., :2, :2]))[..., None]
+    )
+    axes = torch.sqrt(1 / lambdas)
+    return axes[..., 0], axes[..., 1]
+def ellipse_angle(conic_matrix: torch.Tensor) -> torch.Tensor:
+    """Returns angle of ellipse in radians w.r.t. x-axis."""
+    return (
+        -torch.atan2(
+            2 * conic_matrix[..., 1, 0],
+            conic_matrix[..., 1, 1] - conic_matrix[..., 0, 0],
+        )
+        / 2
+    )
+def get_ellipse_params_from_matrices(ellipse_matrices):
+    if ellipse_matrices.shape[0] == 0:
+        return None
+    a, b = ellipse_axes(ellipse_matrices)
+    cx, cy = conic_center(ellipse_matrices)
+    theta = ellipse_angle(ellipse_matrices)
+    a = a.view(-1)
+    b = b.view(-1)
+    cx = cx.view(-1)
+    cy = cy.view(-1)
+    theta = theta.view(-1)
+    ellipses = torch.stack([a, b, cx, cy, theta], dim=1).reshape(-1, 5)
+    return ellipses
+def plot_ellipses(
+    ellipse_params: torch.Tensor,
+    image: torch.Tensor,
+    plot_centers: bool = False,
+    rim_color: str = "r",
+    alpha: float = 0.25,
+) -> None:
+    if ellipse_params is None:
+        return
+    a, b, cx, cy, theta = ellipse_params.unbind(-1)
+    # multiply all pixel values by 4
+    cx = cx * 4
+    cy = cy * 4
+    # draw ellipses
+    for i in range(len(a)):
+        ellipse = mpatches.Ellipse(
+            (cx[i], cy[i]),
+            width=a[i],
+            height=b[i],
+            angle=theta[i],
+            fill=True,
+            alpha=alpha,
+            color=rim_color,
+        )
+        plt.gca().add_patch(ellipse)
+        if plot_centers:
+            plt.scatter(cx[i], cy[i], c=rim_color, s=10)
+    plt.imshow(image)
+# Define the necessary transformations and the inverse normalization
+def invert_normalization(image, mean, std):
+    for t, m, s in zip(image, mean, std):
+        t.mul_(s).add_(m)
+    return torch.clamp(image, 0, 1)
+def process_image(image):
+    original_size = image.size
+    # Define the transform pipeline
+    transform = transforms.Compose(
+        [
+            transforms.Resize((1024, 1024)),
+            transforms.PILToTensor(),
+            transforms.ConvertImageDtype(torch.float),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ]
+    )
+    image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
+    return image_tensor, original_size
+def generate_prediction(image, rpn_nms_thresh, score_thresh, nms_thresh):
+    # Preprocess image
+    image_tensor, original_size = process_image(image)
+    image_tensor = image_tensor.to("cpu")
+    # Ensure the model is in evaluation mode
+    model.rpn.nms_thresh = rpn_nms_thresh
+    model.roi_heads.score_thresh = score_thresh
+    model.roi_heads.nms_thresh = nms_thresh
+    with torch.no_grad():
+        prediction = model(image_tensor)[0]
+    # Invert normalization for display
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    inverted_image = (
+        invert_normalization(image_tensor, mean, std)
+        .squeeze(0)
+        .permute(1, 2, 0)
+        .cpu()
+        .numpy()
+    )
+    # Plot results with ellipses
+    plt.figure(figsize=(10, 10))
+    plt.imshow(inverted_image)
+    plot_ellipses(
+        get_ellipse_params_from_matrices(prediction["ellipse_matrices"]),
+        inverted_image,
+        plot_centers=True,
+        rim_color="red",
+        alpha=0.25,
+    )
+    red_patch = mpatches.Patch(color="red", label="Predicted")
+    plt.legend(handles=[red_patch], loc="upper right")
+    plt.gca().set_aspect(original_size[0] / original_size[1])
+    plt.axis("off")
+    plt.tight_layout()
+    # Save the figure to a buffer and return as an image
+    buf = io.BytesIO()
+    plt.savefig(buf, format="png")
+    buf.seek(0)
+    with Image.open(buf) as output_image:
+        output_image = output_image.copy()
+    buf.close()
+    return output_image
+# Define Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("## Tree Detection from Satellite Images")
+    gr.Markdown("Upload an image and see the detected trees with ellipses.")
+    with gr.Row():
+        image_input = gr.Image(label="Input Image", type="pil")
+        image_output = gr.Image(label="Detected Trees")
+    examples = [
+        ["examples/image1.jpg"],
+        ["examples/image2.jpg"],
+        ["examples/image3.jpg"],
+    ]
+    with gr.Row():
+        rpn_nms_slider = gr.Slider(
+            0.0, 1.0, value=model.rpn.nms_thresh, label="RPN NMS Threshold"
+        )
+        score_thresh_slider = gr.Slider(
+            0.0,
+            1.0,
+            value=model.roi_heads.score_thresh,
+            label="ROI Heads Score Threshold",
+        )
+        nms_thresh_slider = gr.Slider(
+            0.0, 1.0, value=model.roi_heads.nms_thresh, label="ROI Heads NMS Threshold"
+        )
+    submit_button = gr.Button("Detect Trees")
+    submit_button.click(
+        fn=generate_prediction,
+        inputs=[image_input, rpn_nms_slider, score_thresh_slider, nms_thresh_slider],
+        outputs=image_output,
+    )
+    gr.Examples(examples=examples, inputs=image_input, outputs=image_output)
+demo.launch()

ellipse_rcnn/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .core.model import EllipseRCNN

ellipse_rcnn/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (223 Bytes). View file

ellipse_rcnn/core/README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+# README.md
+## Symmetric Kullback-Leibler Divergence Loss
+This directory provides an implementation of a **Symmetric Kullback-Leibler (KL) Divergence Loss** tailored for tensors
+representing ellipses in matrix form. The loss function is designed to measure the difference between two elliptical
+shapes and is particularly useful in optimization and generative modeling tasks.
+## Loss Calculation
+### **Kullback-Leibler Divergence**
+For two ellipses represented by their matrix forms ( $A_1$ ) and ( $A_2$ ), the KL divergence is calculated as:
+$$ D_{KL}(A_1 \parallel A_2) = \frac{1}{2} \left( \text{Tr}(C_2^{-1}C_1) + (\mu_1 - \mu_2)^T C_2^{-1} (\mu_1 - \mu_2) - 2 + \log\left(\frac{\det(C_2)}{\det(C_1)}\right) \right) $$
+Where:
+- ( $C_1$, $C_2$ ): Covariance matrices extracted from ( $A_1$, $A_2$ ).
+- ( $\mu_1$, $\mu_2$ ): Centers (means) of the ellipses, computed from the conic representation.
+- ( $\text{Tr}$ ): Trace operator.
+- ( $C_2^{-1}$ ): Inverse of the covariance matrix of ( $A_2$ ).
+- ( $\det(C_1)$, $\det(C_2)$ ): Determinants of covariance matrices.
+A regularization term ( $\epsilon$ ) is added to ensure numerical stability when computing inverses and determinants.
+### **Symmetric KL Divergence**
+The symmetric version of the KL divergence combines the calculations in both directions:
+$$ D_{KL}^{\text{sym}}(A_1, A_2) = \frac{1}{2} \left( D_{KL}(A_1 \parallel A_2) + D_{KL}(A_2 \parallel A_1) \right) $$
+This ensures a bidirectional comparison, making the function suitable as a loss metric in optimization tasks.
+## Features of the Loss
+- **Shape-Only Comparison**: Option to ignore translation and compute divergence based purely on the shapes (covariance
+  matrices).
+- **NaN Handling**: Replaces NaN values with a specified constant, ensuring robust loss evaluation.
+- **Normalization**: An optional normalization step that rescales the divergence for certain applications.
+### Usage
+The loss is encapsulated in the `SymmetricKLDLoss` class, which integrates seamlessly into PyTorch-based workflows.

ellipse_rcnn/core/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .model import EllipseRCNN # noqa: F401

ellipse_rcnn/core/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (223 Bytes). View file

ellipse_rcnn/core/__pycache__/ellipse_roi_head.cpython-312.pyc ADDED Viewed

Binary file (19.2 kB). View file

ellipse_rcnn/core/__pycache__/kld.cpython-312.pyc ADDED Viewed

Binary file (5.72 kB). View file

ellipse_rcnn/core/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (10.3 kB). View file

ellipse_rcnn/core/__pycache__/wd.cpython-312.pyc ADDED Viewed

Binary file (5.82 kB). View file

ellipse_rcnn/core/ellipse_roi_head.py ADDED Viewed

	@@ -0,0 +1,429 @@

+from typing import Dict, List, Tuple, Optional, TypedDict, NamedTuple, Self
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models.detection.roi_heads import RoIHeads, fastrcnn_loss
+from .kld import SymmetricKLDLoss
+from .wd import WassersteinLoss
+from ..utils.conics import (
+    ellipse_to_conic_matrix,
+    ellipse_axes,
+    ellipse_angle,
+    conic_center,
+)
+class RegressorPrediction(NamedTuple):
+    """
+    Represents the processed outputs of a regression model as a named tuple.
+    This class encapsulates regression model outputs in a structured format, where
+    each attribute corresponds to a specific component of the regression output.
+    These outputs can be directly used for post-processing steps such as transformation
+    into conic matrices or further evaluations of ellipse geometry.
+    Attributes
+    ----------
+    d_a : torch.Tensor
+        The normalized semi-major axis scale factor (logarithmic) used to compute
+        the actual semi-major axis length of ellipses.
+    d_b : torch.Tensor
+        The normalized semi-minor axis scale factor (logarithmic) used to compute
+        the actual semi-minor axis length of ellipses.
+    d_x : torch.Tensor
+        The normalized x-coordinate translation factor, specifying the adjustment
+        to the center of bounding boxes for ellipse placement.
+    d_y : torch.Tensor
+        The normalized y-coordinate translation factor, specifying the adjustment
+        to the center of bounding boxes for ellipse placement.
+    d_theta : torch.Tensor
+        The normalized rotation angle factor which is processed to derive the
+        actual rotation angle (in radians) of ellipses.
+    Notes
+    -----
+    - The attributes `d_a` and `d_b`, representing scale factors for the semi-major
+      and semi-minor axes, are typically bounded between 0 and 1 using a sigmoid activation.
+    - The attributes `d_x` and `d_y` serve as adjustments to bounding box centers, normalized
+      with respect to the bounding box diagonals.
+    - The attribute `d_theta` is normalized to ensure the rotation angle lies within
+      a valid range (after transformation, typically between -π/2 and π/2 radians).
+    - These normalized outputs are post-processed together with bounding box information
+      to construct actionable ellipse parameters such as their axes lengths, centers,
+      and angles.
+    - This structure simplifies downstream regression tasks, such as conversion into
+      conic matrices or calculation of geometrical losses.
+    """
+    d_a: torch.Tensor
+    d_b: torch.Tensor
+    d_theta: torch.Tensor
+    @property
+    def device(self) -> torch.device:
+        return self.d_a.device
+    @property
+    def dtype(self) -> torch.dtype:
+        return self.d_a.dtype
+    def split(self, split_size: list[int] | int, dim: int = 0) -> list[Self]:
+        return [
+            RegressorPrediction(*tensors)
+            for tensors in zip(
+                *[torch.split(attr, split_size, dim=dim) for attr in self]
+            )
+        ]
+class EllipseRegressor(nn.Module):
+    """
+    EllipseRegressor is a neural network module designed to predict parameters of
+    an ellipse given input features.
+    This class is a PyTorch module that uses a feedforward neural network to predict
+    the normalized five parameters of an ellipse: semi-major axis `a`, semi-minor axis `b`, center
+    coordinates (`x`, `y`), and orientation `theta`. The class includes mechanisms
+    for batch normalization and uses Xavier weight initialization for improved
+    training stability and convergence.
+    Attributes
+    ----------
+    ffnn : nn.Sequential
+        A feedforward neural network with two hidden layers and ReLU activations.
+    """
+    def __init__(self, in_channels: int = 1024, hidden_size: int = 64):
+        super().__init__()
+        # Separate prediction heads for better gradient flow
+        self.ffnn = nn.Sequential(
+            nn.Linear(in_channels, hidden_size),
+            nn.ReLU(),
+            nn.Linear(hidden_size, 3),
+            nn.Tanh(),
+        )
+        # Initialize with small values
+        for lin in self.ffnn:
+            if isinstance(lin, nn.Linear):
+                nn.init.xavier_uniform_(lin.weight, gain=0.01)
+                nn.init.zeros_(lin.bias)
+    def forward(self, x: torch.Tensor) -> RegressorPrediction:
+        x = x.flatten(start_dim=1)
+        x = self.ffnn(x)
+        d_a, d_b, d_theta = x.unbind(dim=-1)
+        return RegressorPrediction(d_a=d_a, d_b=d_b, d_theta=d_theta)
+def postprocess_ellipse_predictor(
+    pred: RegressorPrediction,
+    box_proposals: torch.Tensor,
+) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+    """Processes elliptical predictor outputs and converts them into conic matrices.
+    Parameters
+    ----------
+    pred : RegressorPrediction
+        The output of the elliptical predictor model.
+    box_proposals : torch.Tensor
+        Tensor containing proposed bounding box information, with shape (N, 4). Each box
+        is represented as a 4-tuple (x_min, y_min, x_max, y_max).
+    Returns
+    -------
+    tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]
+        A tuple containing:
+        - a (torch.Tensor): Computed semi-major axis of the ellipses.
+        - b (torch.Tensor): Computed semi-minor axis of the ellipses.
+        - x (torch.Tensor): X-coordinates of the ellipse centers.
+        - y (torch.Tensor): Y-coordinates of the ellipse centers.
+        - theta (torch.Tensor): Rotation angles (in radians) for the ellipses.
+    """
+    d_a, d_b, d_theta = pred
+    # Pre-compute box width, height, and diagonal
+    box_width = box_proposals[:, 2] - box_proposals[:, 0]
+    box_height = box_proposals[:, 3] - box_proposals[:, 1]
+    box_diag = torch.sqrt(box_width**2 + box_height**2)
+    a = box_diag * d_a.exp()
+    b = box_diag * d_b.exp()
+    box_x = box_proposals[:, 0] + box_width * 0.5
+    box_y = box_proposals[:, 1] + box_height * 0.5
+    theta = (d_theta * 2.0 - 1.0) * (torch.pi / 2)
+    cos_theta = torch.cos(theta)
+    sin_theta = torch.sin(theta)
+    theta = torch.where(
+        cos_theta >= 0,
+        torch.atan2(sin_theta, cos_theta),
+        torch.atan2(-sin_theta, -cos_theta),
+    )
+    return a, b, box_x, box_y, theta
+class EllipseLossDict(TypedDict):
+    loss_ellipse_kld: torch.Tensor
+    loss_ellipse_smooth_l1: torch.Tensor
+    loss_ellipse_wasserstein: torch.Tensor
+def ellipse_loss(
+    pred: RegressorPrediction,
+    A_target: List[torch.Tensor],
+    pos_matched_idxs: List[torch.Tensor],
+    box_proposals: List[torch.Tensor],
+    kld_loss_fn: SymmetricKLDLoss,
+    wd_loss_fn: WassersteinLoss,
+) -> EllipseLossDict:
+    pos_matched_idxs_batched = torch.cat(pos_matched_idxs, dim=0)
+    A_target = torch.cat(A_target, dim=0)[pos_matched_idxs_batched]
+    box_proposals = torch.cat(box_proposals, dim=0)
+    if A_target.numel() == 0:
+        return {
+            "loss_ellipse_kld": torch.tensor(0.0, device=pred.device, dtype=pred.dtype),
+            "loss_ellipse_smooth_l1": torch.tensor(
+                0.0, device=pred.device, dtype=pred.dtype
+            ),
+            "loss_ellipse_wasserstein": torch.tensor(
+                0.0, device=pred.device, dtype=pred.dtype
+            ),
+        }
+    a_target, b_target = ellipse_axes(A_target)
+    theta_target = ellipse_angle(A_target)
+    # Box proposal parameters
+    box_width = box_proposals[:, 2] - box_proposals[:, 0]
+    box_height = box_proposals[:, 3] - box_proposals[:, 1]
+    box_diag = torch.sqrt(box_width**2 + box_height**2).clamp(min=1e-6)
+    # Normalize target variables
+    da_target = (a_target / box_diag).log()
+    db_target = (b_target / box_diag).log()
+    dtheta_target = (theta_target / (torch.pi / 2) + 1) / 2
+    # Direct parameter losses
+    d_a, d_b, d_theta = pred
+    pred_t = torch.stack([d_a, d_b, d_theta], dim=1)
+    target_t = torch.stack([da_target, db_target, dtheta_target], dim=1)
+    loss_smooth_l1 = F.smooth_l1_loss(pred_t, target_t, beta=(1 / 9), reduction="sum")
+    loss_smooth_l1 /= box_proposals.shape[0]
+    loss_smooth_l1 = loss_smooth_l1.nan_to_num(nan=0.0).clip(max=float(1e4))
+    a, b, x, y, theta = postprocess_ellipse_predictor(pred, box_proposals)
+    A_pred = ellipse_to_conic_matrix(a=a, b=b, theta=theta, x=x, y=y)
+    loss_kld = kld_loss_fn.forward(A_pred, A_target).clip(max=float(1e4)).mean() * 0.1
+    loss_wd = torch.zeros(1, device=pred.device, dtype=pred.dtype)
+    # loss_wd = wd_loss_fn.forward(A_pred, A_target).clip(max=float(1e4)).mean() * 0.1
+    return {
+        "loss_ellipse_kld": loss_kld,
+        "loss_ellipse_smooth_l1": loss_smooth_l1,
+        "loss_ellipse_wasserstein": loss_wd,
+    }
+class EllipseRoIHeads(RoIHeads):
+    def __init__(
+        self,
+        box_roi_pool: nn.Module,
+        box_head: nn.Module,
+        box_predictor: nn.Module,
+        fg_iou_thresh: float,
+        bg_iou_thresh: float,
+        batch_size_per_image: int,
+        positive_fraction: float,
+        bbox_reg_weights: Optional[Tuple[float, float, float, float]],
+        score_thresh: float,
+        nms_thresh: float,
+        detections_per_img: int,
+        ellipse_roi_pool: nn.Module,
+        ellipse_head: nn.Module,
+        ellipse_predictor: nn.Module,
+        # Loss parameters
+        kld_shape_only: bool = False,
+        kld_normalize: bool = False,
+        # Numerical stability parameters
+        nan_to_num: float = 10.0,
+        loss_scale: float = 1.0,
+    ):
+        super().__init__(
+            box_roi_pool,
+            box_head,
+            box_predictor,
+            fg_iou_thresh,
+            bg_iou_thresh,
+            batch_size_per_image,
+            positive_fraction,
+            bbox_reg_weights,
+            score_thresh,
+            nms_thresh,
+            detections_per_img,
+        )
+        self.ellipse_roi_pool = ellipse_roi_pool
+        self.ellipse_head = ellipse_head
+        self.ellipse_predictor = ellipse_predictor
+        self.kld_loss = SymmetricKLDLoss(
+            shape_only=kld_shape_only,
+            normalize=kld_normalize,
+            nan_to_num=nan_to_num,
+        )
+        self.wd_loss = WassersteinLoss(
+            nan_to_num=nan_to_num,
+            normalize=kld_normalize,
+        )
+        self.loss_scale = loss_scale
+    def has_ellipse_reg(self) -> bool:
+        if self.ellipse_roi_pool is None:
+            return False
+        if self.ellipse_head is None:
+            return False
+        if self.ellipse_predictor is None:
+            return False
+        return True
+    def postprocess_ellipse_regressions(self):
+        pass
+    def forward(
+        self,
+        features: Dict[str, torch.Tensor],
+        proposals: List[torch.Tensor],
+        image_shapes: List[Tuple[int, int]],
+        targets: Optional[List[Dict[str, torch.Tensor]]] = None,
+    ) -> Tuple[List[Dict[str, torch.Tensor]], Dict[str, torch.Tensor]]:
+        if targets is not None:
+            for t in targets:
+                floating_point_types = (torch.float, torch.double, torch.half)
+                if t["boxes"].dtype not in floating_point_types:
+                    raise TypeError("target boxes must be of float type")
+                if t["ellipse_matrices"].dtype not in floating_point_types:
+                    raise TypeError("target ellipse_offsets must be of float type")
+                if t["labels"].dtype != torch.int64:
+                    raise TypeError("target labels must be of int64 type")
+        if self.training:
+            proposals, matched_idxs, labels, regression_targets = (
+                self.select_training_samples(proposals, targets)
+            )
+        else:
+            labels = None
+            regression_targets = None
+            matched_idxs = None
+        box_features = self.box_roi_pool(features, proposals, image_shapes)
+        box_features = self.box_head(box_features)
+        class_logits, box_regression = self.box_predictor(box_features)
+        result: List[Dict[str, torch.Tensor]] = []
+        losses = {}
+        if self.training:
+            if labels is None or regression_targets is None:
+                raise ValueError(
+                    "Labels and regression targets must not be None during training"
+                )
+            loss_classifier, loss_box_reg = fastrcnn_loss(
+                class_logits, box_regression, labels, regression_targets
+            )
+            losses = {"loss_classifier": loss_classifier, "loss_box_reg": loss_box_reg}
+        else:
+            boxes, scores, labels = self.postprocess_detections(
+                class_logits, box_regression, proposals, image_shapes
+            )
+            num_images = len(boxes)
+            for i in range(num_images):
+                result.append(
+                    {
+                        "boxes": boxes[i],
+                        "labels": labels[i],
+                        "scores": scores[i],
+                    }
+                )
+        if self.has_ellipse_reg():
+            ellipse_box_proposals = [p["boxes"] for p in result]
+            if self.training:
+                if matched_idxs is None:
+                    raise ValueError("matched_idxs must not be None during training")
+                # during training, only focus on positive boxes
+                num_images = len(proposals)
+                ellipse_box_proposals = []
+                pos_matched_idxs = []
+                for img_id in range(num_images):
+                    pos = torch.where(labels[img_id] > 0)[0]
+                    ellipse_box_proposals.append(proposals[img_id][pos])
+                    pos_matched_idxs.append(matched_idxs[img_id][pos])
+            else:
+                pos_matched_idxs = None  # type: ignore
+            if self.ellipse_roi_pool is not None:
+                ellipse_features = self.ellipse_roi_pool(
+                    features, ellipse_box_proposals, image_shapes
+                )
+                ellipse_features = self.ellipse_head(ellipse_features)
+                ellipse_shapes_normalised = self.ellipse_predictor(ellipse_features)
+            else:
+                raise Exception("Expected ellipse_roi_pool to be not None")
+            loss_ellipse_regressor = {}
+            if self.training:
+                if targets is None:
+                    raise ValueError("Targets must not be None during training")
+                if pos_matched_idxs is None:
+                    raise ValueError(
+                        "pos_matched_idxs must not be None during training"
+                    )
+                if ellipse_shapes_normalised is None:
+                    raise ValueError(
+                        "ellipse_shapes_normalised must not be None during training"
+                    )
+                ellipse_matrix_targets = [t["ellipse_matrices"] for t in targets]
+                rcnn_loss_ellipse = ellipse_loss(
+                    ellipse_shapes_normalised,
+                    ellipse_matrix_targets,
+                    pos_matched_idxs,
+                    ellipse_box_proposals,
+                    self.kld_loss,
+                    self.wd_loss,
+                )
+                if self.loss_scale != 1.0:
+                    rcnn_loss_ellipse["loss_ellipse_kld"] *= self.loss_scale
+                    rcnn_loss_ellipse["loss_ellipse_smooth_l1"] *= self.loss_scale
+                loss_ellipse_regressor.update(rcnn_loss_ellipse)
+            else:
+                ellipses_per_image = [lbl.shape[0] for lbl in labels]
+                for pred, r, box in zip(
+                    ellipse_shapes_normalised.split(ellipses_per_image, dim=0),
+                    result,
+                    ellipse_box_proposals,
+                ):
+                    a, b, x, y, theta = postprocess_ellipse_predictor(pred, box)
+                    A_pred = ellipse_to_conic_matrix(a=a, b=b, theta=theta, x=x, y=y)
+                    r["ellipse_matrices"] = A_pred
+                    # r["boxes"] = bbox_ellipse(A_pred)
+            losses.update(loss_ellipse_regressor)
+        return result, losses

ellipse_rcnn/core/ga.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import torch
+from ellipse_rcnn.utils.conics import conic_center
+def gaussian_angle_distance(A1: torch.Tensor, A2: torch.Tensor) -> torch.Tensor:
+    # Extract covariance matrices (negative of the top-left blocks)
+    cov1, cov2 = map(lambda arr: -arr[..., :2, :2], (A1, A2))
+    # Extract the means by computing conic centers
+    c1_x, c1_y = conic_center(A1)
+    c2_x, c2_y = conic_center(A2)
+    # Stack the conic centers into the appropriate shape for computation
+    m1 = torch.stack((c1_x, c1_y), dim=-1)[..., None]
+    m2 = torch.stack((c2_x, c2_y), dim=-1)[..., None]
+    # Compute determinants for covariance matrices
+    det_cov1 = torch.clamp(cov1.det(), min=torch.finfo(cov1.dtype).eps)
+    det_cov2 = torch.clamp(cov2.det(), min=torch.finfo(cov2.dtype).eps)
+    cov_sum = cov1 + cov2
+    # Determinant of sum (clamped for numerical stability)
+    det_cov_sum = torch.clamp(cov_sum.det(), min=torch.finfo(cov_sum.dtype).eps)
+    # Compute fractional term with stabilized determinants
+    frac_term = (4 * torch.sqrt(det_cov1 * det_cov2)) / det_cov_sum
+    # Stable computation of the exponential term
+    mean_diff = m1 - m2
+    cov_sum_inv = torch.linalg.solve(
+        cov_sum, torch.eye(cov_sum.size(-1), dtype=cov_sum.dtype, device=cov_sum.device)
+    )
+    exp_arg = -0.5 * mean_diff.transpose(-1, -2) @ cov1 @ cov_sum_inv @ cov2 @ mean_diff
+    exp_term = torch.exp(torch.clamp(exp_arg, min=-50, max=50)).squeeze()
+    angle_term = frac_term * exp_term
+    return torch.arccos(angle_term)
+class GaussianAngleDistanceLoss(torch.nn.Module):
+    """
+    Computes the Gaussian Angle Distance loss between two tensors.
+    This class serves as a wrapper around the `gaussian_angle_distance` function,
+    providing a clean interface and ensuring numerical stability.
+    Attributes
+    ----------
+    normalize : bool
+    nan_to_num : float
+        The value to replace NaN entries in the computation with. Helps maintain numerical
+        stability in cases where the input tensors contain undefined or invalid values.
+    """
+    def __init__(self, normalize: bool = True, nan_to_num: float = 10.0):
+        super().__init__()
+        self.nan_to_num = nan_to_num
+    def forward(self, A1: torch.Tensor, A2: torch.Tensor) -> torch.Tensor:
+        # Calculate the Gaussian angle distance
+        distance = gaussian_angle_distance(A1, A2)
+        # Replace NaN values with a predefined constant for numerical stability
+        distance = torch.nan_to_num(distance, nan=self.nan_to_num)
+        return distance

ellipse_rcnn/core/kld.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import torch
+from ellipse_rcnn.utils.conics import conic_center
+def mv_kullback_leibler_divergence(
+    A1: torch.Tensor,
+    A2: torch.Tensor,
+    *,
+    shape_only: bool = False,
+) -> torch.Tensor:
+    """
+    Compute multi-variate KL divergence between ellipses represented by their matrices.
+    Args:
+        A1, A2: Ellipse matrices of shape (..., 3, 3)
+        shape_only: If True, ignores displacement term
+    """
+    # Ensure that batch sizes are equal
+    if A1.shape[:-2] != A2.shape[:-2]:
+        raise ValueError(
+            f"Batch size mismatch: A1 has shape {A1.shape[:-2]}, A2 has shape {A2.shape[:-2]}"
+        )
+    # Extract the upper 2x2 blocks as covariance matrices
+    cov1 = A1[..., :2, :2]
+    cov2 = A2[..., :2, :2]
+    # Compute centers
+    m1 = torch.vstack(conic_center(A1)).T[..., None]
+    m2 = torch.vstack(conic_center(A2)).T[..., None]
+    # Compute inverse
+    try:
+        cov2_inv = torch.linalg.inv(cov2)
+    except RuntimeError:
+        cov2_inv = torch.linalg.pinv(cov2)
+    # Trace term
+    trace_term = (cov2_inv @ cov1).diagonal(dim2=-2, dim1=-1).sum(1)
+    # Log determinant term
+    det_cov1 = torch.det(cov1)
+    det_cov2 = torch.det(cov2)
+    log_term = torch.log(det_cov2 / det_cov1).nan_to_num(nan=0.0)
+    if shape_only:
+        displacement_term = 0
+    else:
+        # Mean difference term
+        displacement_term = (
+            ((m1 - m2).transpose(-1, -2) @ cov2_inv @ (m1 - m2)).squeeze().abs()
+        )
+    return 0.5 * (trace_term + displacement_term - 2 + log_term)
+def symmetric_kl_divergence(
+    A1: torch.Tensor,
+    A2: torch.Tensor,
+    *,
+    shape_only: bool = False,
+    nan_to_num: float = float(1e4),
+    normalize: bool = False,
+) -> torch.Tensor:
+    """
+    Compute symmetric KL divergence between ellipses.
+    """
+    kl_12 = torch.nan_to_num(
+        mv_kullback_leibler_divergence(A1, A2, shape_only=shape_only), nan_to_num
+    )
+    kl_21 = torch.nan_to_num(
+        mv_kullback_leibler_divergence(A2, A1, shape_only=shape_only), nan_to_num
+    )
+    kl = (kl_12 + kl_21) / 2
+    if kl.lt(0).any():
+        raise ValueError("Negative KL divergence encountered.")
+    if normalize:
+        kl = 1 - torch.exp(-kl)
+    return kl
+class SymmetricKLDLoss(torch.nn.Module):
+    """
+    Computes the symmetric Kullback-Leibler divergence (KLD) loss between two tensors.
+    SymmetricKLDLoss is used for measuring the divergence between two probability
+    distributions or tensors, which can be useful in tasks such as generative modeling
+    or optimization. The function allows for options such as normalizing the tensors or
+    focusing only on their shape for comparison. Additionally, it includes a feature
+    to handle NaN values by replacing them with a numeric constant.
+    Attributes
+    ----------
+    shape_only : bool
+        If True, computes the divergence based on the shape of the tensors only. This
+        can be used to evaluate similarity without considering magnitude differences.
+    nan_to_num : float
+        The value to replace NaN entries in the tensors with. Helps maintain numerical
+        stability in cases where the input tensors contain undefined or invalid values.
+    normalize : bool
+        If True, normalizes the tensors before computing the divergence. This is
+        typically used when the inputs are not already probability distributions.
+    """
+    def __init__(
+        self, shape_only: bool = True, nan_to_num: float = 10.0, normalize: bool = False
+    ):
+        super().__init__()
+        self.shape_only = shape_only
+        self.nan_to_num = nan_to_num
+        self.normalize = normalize
+    def forward(self, A1: torch.Tensor, A2: torch.Tensor) -> torch.Tensor:
+        return symmetric_kl_divergence(
+            A1,
+            A2,
+            shape_only=self.shape_only,
+            nan_to_num=self.nan_to_num,
+            normalize=self.normalize,
+        )

ellipse_rcnn/core/model.py ADDED Viewed

	@@ -0,0 +1,282 @@

+from types import NoneType
+from typing import List, Tuple, Optional, Any
+import pytorch_lightning as pl
+import torch
+from torch import nn
+from torchvision.models import ResNet50_Weights, WeightsEnum
+from torchvision.models.detection.anchor_utils import AnchorGenerator
+from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
+from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor  # noqa: F
+from torchvision.models.detection.generalized_rcnn import GeneralizedRCNN
+from torchvision.models.detection.rpn import RPNHead, RegionProposalNetwork
+from torchvision.models.detection.transform import GeneralizedRCNNTransform
+from torchvision.ops import MultiScaleRoIAlign
+from .ellipse_roi_head import EllipseRoIHeads, EllipseRegressor
+from ..utils.types import CollatedBatchType
+class EllipseRCNN(GeneralizedRCNN):
+    def __init__(
+        self,
+        num_classes: int = 2,
+        # transform parameters
+        backbone_name: str = "resnet50",
+        weights: WeightsEnum | str = ResNet50_Weights.IMAGENET1K_V1,
+        min_size: int = 256,
+        max_size: int = 512,
+        image_mean: Optional[List[float]] = None,
+        image_std: Optional[List[float]] = None,
+        # Region Proposal Network parameters
+        rpn_anchor_generator: Optional[nn.Module] = None,
+        rpn_head: Optional[nn.Module] = None,
+        rpn_pre_nms_top_n_train: int = 2000,
+        rpn_pre_nms_top_n_test: int = 1000,
+        rpn_post_nms_top_n_train: int = 2000,
+        rpn_post_nms_top_n_test: int = 1000,
+        rpn_nms_thresh: float = 0.7,
+        rpn_fg_iou_thresh: float = 0.7,
+        rpn_bg_iou_thresh: float = 0.3,
+        rpn_batch_size_per_image: int = 256,
+        rpn_positive_fraction: float = 0.5,
+        rpn_score_thresh: float = 0.0,
+        # Box parameters
+        box_roi_pool: Optional[nn.Module] = None,
+        box_head: Optional[nn.Module] = None,
+        box_predictor: Optional[nn.Module] = None,
+        box_score_thresh: float = 0.05,
+        box_nms_thresh: float = 0.5,
+        box_detections_per_img: int = 100,
+        box_fg_iou_thresh: float = 0.5,
+        box_bg_iou_thresh: float = 0.5,
+        box_batch_size_per_image: int = 512,
+        box_positive_fraction: float = 0.25,
+        bbox_reg_weights: Optional[Tuple[float, float, float, float]] = None,
+        # Ellipse regressor
+        ellipse_roi_pool: Optional[nn.Module] = None,
+        ellipse_head: Optional[nn.Module] = None,
+        ellipse_predictor: Optional[nn.Module] = None,
+        ellipse_loss_scale: float = 1.0,
+        ellipse_loss_normalize: bool = False,
+    ):
+        if backbone_name != "resnet50" and weights == ResNet50_Weights.IMAGENET1K_V1:
+            raise ValueError(
+                "If backbone_name is not resnet50, weights_enum must be specified"
+            )
+        backbone = resnet_fpn_backbone(
+            backbone_name=backbone_name, weights=weights, trainable_layers=5
+        )
+        if not hasattr(backbone, "out_channels"):
+            raise ValueError(
+                "backbone should contain an attribute out_channels "
+                "specifying the number of output channels (assumed to be the "
+                "same for all the levels)"
+            )
+        if not isinstance(rpn_anchor_generator, (AnchorGenerator, NoneType)):
+            raise TypeError(
+                "rpn_anchor_generator must be an instance of AnchorGenerator or None"
+            )
+        if not isinstance(box_roi_pool, (MultiScaleRoIAlign, NoneType)):
+            raise TypeError(
+                "box_roi_pool must be an instance of MultiScaleRoIAlign or None"
+            )
+        if num_classes is not None:
+            if box_predictor is not None:
+                raise ValueError(
+                    "num_classes should be None when box_predictor is specified"
+                )
+        else:
+            if box_predictor is None:
+                raise ValueError(
+                    "num_classes should not be None when box_predictor "
+                    "is not specified"
+                )
+        out_channels = backbone.out_channels
+        if rpn_anchor_generator is None:
+            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
+            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
+            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
+        if rpn_head is None:
+            rpn_head = RPNHead(
+                out_channels, rpn_anchor_generator.num_anchors_per_location()[0]
+            )
+        rpn_pre_nms_top_n = dict(
+            training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test
+        )
+        rpn_post_nms_top_n = dict(
+            training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test
+        )
+        rpn = RegionProposalNetwork(
+            rpn_anchor_generator,
+            rpn_head,
+            rpn_fg_iou_thresh,
+            rpn_bg_iou_thresh,
+            rpn_batch_size_per_image,
+            rpn_positive_fraction,
+            rpn_pre_nms_top_n,
+            rpn_post_nms_top_n,
+            rpn_nms_thresh,
+            score_thresh=rpn_score_thresh,
+        )
+        default_representation_size = 1024
+        if box_roi_pool is None:
+            box_roi_pool = MultiScaleRoIAlign(
+                featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2
+            )
+        if box_head is None:
+            resolution = box_roi_pool.output_size[0]
+            if isinstance(resolution, int):
+                box_head = TwoMLPHead(
+                    out_channels * resolution**2, default_representation_size
+                )
+            else:
+                raise ValueError(
+                    "resolution should be an int but is {}".format(resolution)
+                )
+        if box_predictor is None:
+            box_predictor = FastRCNNPredictor(default_representation_size, num_classes)
+        if ellipse_roi_pool is None:
+            ellipse_roi_pool = MultiScaleRoIAlign(
+                featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2
+            )
+        resolution = box_roi_pool.output_size[0]
+        if ellipse_head is None:
+            if isinstance(resolution, int):
+                ellipse_head = TwoMLPHead(
+                    out_channels * resolution**2, default_representation_size
+                )
+            else:
+                raise ValueError(
+                    "resolution should be an int but is {}".format(resolution)
+                )
+        if ellipse_predictor is None:
+            ellipse_predictor = EllipseRegressor(
+                default_representation_size, num_classes
+            )
+        roi_heads = EllipseRoIHeads(
+            # Box
+            box_roi_pool,
+            box_head,
+            box_predictor,
+            box_fg_iou_thresh,
+            box_bg_iou_thresh,
+            box_batch_size_per_image,
+            box_positive_fraction,
+            bbox_reg_weights,
+            box_score_thresh,
+            box_nms_thresh,
+            box_detections_per_img,
+            # Ellipse
+            ellipse_roi_pool=ellipse_roi_pool,
+            ellipse_head=ellipse_head,
+            ellipse_predictor=ellipse_predictor,
+            loss_scale=ellipse_loss_scale,
+            kld_normalize=ellipse_loss_normalize,
+        )
+        if image_mean is None:
+            image_mean = [0.485, 0.456, 0.406]
+        if image_std is None:
+            image_std = [0.229, 0.224, 0.225]
+        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)
+        super().__init__(backbone, rpn, roi_heads, transform)
+class EllipseRCNNLightning(pl.LightningModule):
+    def __init__(
+        self,
+        model: EllipseRCNN,
+        lr: float = 1e-4,
+        weight_decay: float = 1e-4,
+    ):
+        super().__init__()
+        self.model = model
+        self.save_hyperparameters(ignore=["model"])
+    def configure_optimizers(self) -> Any:
+        optimizer = torch.optim.AdamW(
+            self.model.parameters(),
+            lr=self.hparams.lr,
+            weight_decay=self.hparams.weight_decay,
+            amsgrad=True,
+        )
+        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
+            optimizer, mode="min", factor=0.5, patience=2, min_lr=1e-6
+        )
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {"scheduler": scheduler, "monitor": "val/loss_total"},
+        }
+    def training_step(
+        self, batch: CollatedBatchType, batch_idx: int = 0
+    ) -> torch.Tensor:
+        images, targets = batch
+        loss_dict = self.model(images, targets)
+        self.log_dict(
+            {f"train/{k}": v for k, v in loss_dict.items()},
+            prog_bar=True,
+            logger=True,
+            on_step=True,
+        )
+        loss = sum(loss_dict.values())
+        self.log("train/loss_total", loss, prog_bar=True, logger=True, on_step=True)
+        return loss
+    def validation_step(
+        self, batch: CollatedBatchType, batch_idx: int = 0
+    ) -> torch.Tensor:
+        self.train(True)
+        images, targets = batch
+        loss_dict = self.model(images, targets)
+        self.log_dict(
+            {f"val/{k}": v for k, v in loss_dict.items()},
+            logger=True,
+            on_step=False,
+            on_epoch=True,
+        )
+        val_loss = sum(loss_dict.values())
+        self.log(
+            "val/loss_total",
+            val_loss,
+            prog_bar=True,
+            logger=True,
+            on_step=False,
+            on_epoch=True,
+        )
+        self.log(
+            "hp_metric",
+            val_loss,
+        )
+        self.log(
+            "lr",
+            self.lr_schedulers().get_last_lr()[0],
+        )
+        return val_loss

ellipse_rcnn/core/wd.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import torch
+from ellipse_rcnn.utils.conics import conic_center
+def wasserstein_distance(
+    A1: torch.Tensor,
+    A2: torch.Tensor,
+    *,
+    shape_only: bool = False,
+) -> torch.Tensor:
+    """
+    Compute the squared Wasserstein-2 distance between ellipses represented by their matrices.
+    Args:
+        A1, A2: Ellipse matrices of shape (..., 3, 3)
+        shape_only: If True, ignores displacement term
+    Returns:
+        Tensor containing Wasserstein distances
+    """
+    # Ensure batch sizes match
+    if A1.shape[:-2] != A2.shape[:-2]:
+        raise ValueError(
+            f"Batch size mismatch: A1 has shape {A1.shape[:-2]}, A2 has shape {A2.shape[:-2]}"
+        )
+    # Extract covariance matrices (upper 2x2 blocks)
+    cov1 = A1[..., :2, :2]
+    cov2 = A2[..., :2, :2]
+    if shape_only:
+        displacement_term = 0
+    else:
+        # Compute centers
+        m1 = torch.vstack(conic_center(A1)).T[..., None]
+        m2 = torch.vstack(conic_center(A2)).T[..., None]
+        # Mean difference term
+        displacement_term = torch.sum((m1 - m2) ** 2, dim=(1, 2))
+    # Compute the matrix square root term
+    eigenvalues1, eigenvectors1 = torch.linalg.eigh(cov1)
+    sqrt_eigenvalues1 = torch.sqrt(torch.clamp(eigenvalues1, min=1e-7))
+    sqrt_cov1 = (
+        eigenvectors1
+        @ torch.diag_embed(sqrt_eigenvalues1)
+        @ eigenvectors1.transpose(-2, -1)
+    )
+    inner_term = sqrt_cov1 @ cov2 @ sqrt_cov1
+    eigenvalues_inner, eigenvectors_inner = torch.linalg.eigh(inner_term)
+    sqrt_inner = (
+        eigenvectors_inner
+        @ torch.diag_embed(torch.sqrt(torch.clamp(eigenvalues_inner, min=1e-7)))
+        @ eigenvectors_inner.transpose(-2, -1)
+    )
+    trace_term = (
+        torch.diagonal(cov1, dim1=-2, dim2=-1).sum(-1)
+        + torch.diagonal(cov2, dim1=-2, dim2=-1).sum(-1)
+        - 2 * torch.diagonal(sqrt_inner, dim1=-2, dim2=-1).sum(-1)
+    )
+    return displacement_term + trace_term
+def symmetric_wasserstein_distance(
+    A1: torch.Tensor,
+    A2: torch.Tensor,
+    *,
+    shape_only: bool = False,
+    nan_to_num: float = float(1e4),
+    normalize: bool = False,
+) -> torch.Tensor:
+    """
+    Compute symmetric Wasserstein distance between ellipses.
+    Args:
+        A1, A2: Ellipse matrices
+        shape_only: If True, ignores displacement term
+        nan_to_num: Value to replace NaN entries with
+        normalize: If True, normalizes the output to [0, 1]
+    """
+    w = torch.nan_to_num(
+        wasserstein_distance(A1, A2, shape_only=shape_only), nan=nan_to_num
+    )
+    if w.lt(0).any():
+        raise ValueError("Negative Wasserstein distance encountered.")
+    if normalize:
+        w = 1 - torch.exp(-w)
+    return w
+class WassersteinLoss(torch.nn.Module):
+    """
+    Computes the Wasserstein distance loss between two ellipse tensors.
+    The Wasserstein distance provides a natural metric for comparing probability
+    distributions or shapes, with advantages over KL divergence such as:
+    - It's symmetric by definition
+    - It provides a true metric (satisfies triangle inequality)
+    - It's well-behaved even when distributions have different supports
+    Attributes:
+        shape_only: If True, computes distance based on shape without considering position
+        nan_to_num: Value to replace NaN entries with
+        normalize: If True, normalizes output to [0, 1] using exponential scaling
+    """
+    def __init__(
+        self, shape_only: bool = True, nan_to_num: float = 10.0, normalize: bool = False
+    ):
+        super().__init__()
+        self.shape_only = shape_only
+        self.nan_to_num = nan_to_num
+        self.normalize = normalize
+    def forward(self, A1: torch.Tensor, A2: torch.Tensor) -> torch.Tensor:
+        return symmetric_wasserstein_distance(
+            A1,
+            A2,
+            shape_only=self.shape_only,
+            nan_to_num=self.nan_to_num,
+            normalize=self.normalize,
+        )

ellipse_rcnn/utils/__init__.py ADDED Viewed

File without changes

ellipse_rcnn/utils/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (180 Bytes). View file

ellipse_rcnn/utils/__pycache__/conics.cpython-312.pyc ADDED Viewed

Binary file (8.37 kB). View file

ellipse_rcnn/utils/__pycache__/types.cpython-312.pyc ADDED Viewed

Binary file (2.73 kB). View file

ellipse_rcnn/utils/__pycache__/viz.cpython-312.pyc ADDED Viewed

Binary file (4.9 kB). View file

ellipse_rcnn/utils/conics.py ADDED Viewed

	@@ -0,0 +1,209 @@

+from typing import Literal
+import torch
+@torch.jit.script
+def adjugate_matrix(matrix: torch.Tensor) -> torch.Tensor:
+    """Return adjugate matrix [1].
+    Parameters
+    ----------
+    matrix:
+        Input matrix
+    Returns
+    -------
+    torch.Tensor
+        Adjugate of input matrix
+    References
+    ----------
+    .. [1] https://en.wikipedia.org/wiki/Adjugate_matrix
+    """
+    cofactor = torch.inverse(matrix).T * torch.det(matrix)
+    return cofactor.T
+# @torch.jit.script
+def unimodular_matrix(matrix: torch.Tensor) -> torch.Tensor:
+    """Rescale matrix such that det(ellipses) = 1, in other words, make it unimodular. Doest not work with tensors
+    of dtype torch.float64.
+    Parameters
+    ----------
+    matrix:
+        Matrix input
+    Returns
+    -------
+    torch.Tensor
+        Unimodular version of input matrix.
+    """
+    val = 1.0 / torch.det(matrix)
+    return (torch.sign(val) * torch.pow(torch.abs(val), 1.0 / 3.0))[
+        ..., None, None
+    ] * matrix
+# @torch.jit.script
+def ellipse_to_conic_matrix(
+    *,
+    a: torch.Tensor,
+    b: torch.Tensor,
+    x: torch.Tensor | None = None,
+    y: torch.Tensor | None = None,
+    theta: torch.Tensor | None = None,
+) -> torch.Tensor:
+    r"""Returns matrix representation for crater derived from ellipse parameters such that _[1]:
+      | A = a²(sin θ)² + b²(cos θ)²
+      | B = 2(b² - a²) sin θ cos θ
+      | C = a²(cos θ)² + b²(sin θ)²
+      | D = -2Ax₀ - By₀
+      | E = -Bx₀ - 2Cy₀
+      | F = Ax₀² + Bx₀y₀ + Cy₀² - a²b²
+    Resulting in a conic matrix:
+    ::
+                |A    B/2  D/2 |
+        M  =    |B/2  C    E/2 |
+                |D/2  E/2  G   |
+    Parameters
+    ----------
+    a:
+        Semi-Major ellipse axis
+    b:
+        Semi-Minor ellipse axis
+    theta:
+        Ellipse angle (radians)
+    x:
+        X-position in 2D cartesian coordinate system (coplanar)
+    y:
+        Y-position in 2D cartesian coordinate system (coplanar)
+    Returns
+    -------
+    torch.Tensor
+        Array of ellipse matrices
+    References
+    ----------
+    .. [1] https://www.researchgate.net/publication/355490899_Lunar_Crater_Identification_in_Digital_Images
+    """
+    x = x if x is not None else torch.zeros(1)
+    y = y if y is not None else torch.zeros(1)
+    theta = theta if theta is not None else torch.zeros(1)
+    sin_theta = torch.sin(theta)
+    cos_theta = torch.cos(theta)
+    a2 = a**2
+    b2 = b**2
+    A = a2 * sin_theta**2 + b2 * cos_theta**2
+    B = 2 * (b2 - a2) * sin_theta * cos_theta
+    C = a2 * cos_theta**2 + b2 * sin_theta**2
+    D = -2 * A * x - B * y
+    F = -B * x - 2 * C * y
+    G = A * (x**2) + B * x * y + C * (y**2) - a2 * b2
+    # Create (array of) of conic matrix (N, 3, 3)
+    conic_matrix = torch.stack(
+        tensors=(
+            torch.stack((A, B / 2, D / 2), dim=-1),
+            torch.stack((B / 2, C, F / 2), dim=-1),
+            torch.stack((D / 2, F / 2, G), dim=-1),
+        ),
+        dim=-1,
+    )
+    return conic_matrix.squeeze()
+def conic_center(conic_matrix: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+    """Returns center of ellipse in 2D cartesian coordinate system with numerical stability."""
+    # Extract the top-left 2x2 submatrix of the conic matrix
+    A = conic_matrix[..., :2, :2]
+    # Add stabilization for pseudoinverse computation by clamping singular values
+    A_pinv = torch.linalg.pinv(A, rcond=torch.finfo(A.dtype).eps)
+    # Extract the last two rows for the linear term
+    b = -conic_matrix[..., :2, 2][..., None]
+    # Stabilize any potential numerical instabilities
+    centers = torch.matmul(A_pinv, b).squeeze()
+    return centers[..., 0], centers[..., 1]
+def ellipse_axes(conic_matrix: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+    """Returns semi-major and semi-minor axes of ellipse in 2D cartesian coordinate system."""
+    lambdas = (
+        torch.linalg.eigvalsh(conic_matrix[..., :2, :2])
+        / (-torch.det(conic_matrix) / torch.det(conic_matrix[..., :2, :2]))[..., None]
+    )
+    axes = torch.sqrt(1 / lambdas)
+    return axes[..., 0], axes[..., 1]
+def ellipse_angle(conic_matrix: torch.Tensor) -> torch.Tensor:
+    """Returns angle of ellipse in radians w.r.t. x-axis."""
+    return (
+        -torch.atan2(
+            2 * conic_matrix[..., 1, 0],
+            conic_matrix[..., 1, 1] - conic_matrix[..., 0, 0],
+        )
+        / 2
+    )
+def bbox_ellipse(
+    ellipses: torch.Tensor,
+    box_type: Literal["xyxy", "xywh", "cxcywh"] = "xyxy",
+) -> torch.Tensor:
+    """Converts (array of) ellipse matrices to bounding box tensor with format [xmin, ymin, xmax, ymax].
+    Parameters
+    ----------
+    ellipses:
+        Array of ellipse matrices
+    box_type:
+        Format of bounding boxes, default is "xyxy"
+    Returns
+    -------
+        Array of bounding boxes
+    """
+    cx, cy = conic_center(ellipses)
+    theta = ellipse_angle(ellipses)
+    semi_major_axis, semi_minor_axis = ellipse_axes(ellipses)
+    ux, uy = semi_major_axis * torch.cos(theta), semi_major_axis * torch.sin(theta)
+    vx, vy = (
+        semi_minor_axis * torch.cos(theta + torch.pi / 2),
+        semi_minor_axis * torch.sin(theta + torch.pi / 2),
+    )
+    box_halfwidth = torch.sqrt(ux**2 + vx**2)
+    box_halfheight = torch.sqrt(uy**2 + vy**2)
+    bboxes = torch.vstack(
+        (
+            cx - box_halfwidth,
+            cy - box_halfheight,
+            cx + box_halfwidth,
+            cy + box_halfheight,
+        )
+    ).T
+    if box_type != "xyxy":
+        from torchvision.ops import boxes as box_ops
+        bboxes = box_ops.box_convert(bboxes, in_fmt="xyxy", out_fmt=box_type)
+    return bboxes

ellipse_rcnn/utils/data/__init__.py ADDED Viewed

File without changes

ellipse_rcnn/utils/data/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (185 Bytes). View file

ellipse_rcnn/utils/data/__pycache__/base.cpython-312.pyc ADDED Viewed

Binary file (2.01 kB). View file

ellipse_rcnn/utils/data/base.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from abc import ABC, abstractmethod
+from typing import Any
+from torch.utils.data import Dataset
+from ellipse_rcnn.utils.types import (
+    TargetDict,
+    CollatedBatchType,
+    UncollatedBatchType,
+)
+def collate_fn(batch: UncollatedBatchType) -> CollatedBatchType:
+    """
+    Collate function for the :class:`DataLoader`.
+    Parameters
+    ----------
+    batch:
+        A batch of data.
+    """
+    return tuple(zip(*batch))  # type: ignore
+class EllipseDatasetBase(ABC, Dataset):
+    @abstractmethod
+    def load_image(self, index: int) -> Any:
+        """
+        Load the image for the given index.
+        Parameters
+        ----------
+        index:
+            The index of the image.
+        Returns
+        -------
+        image:
+            The raw image.
+        """
+        pass
+    @abstractmethod
+    def load_target_dict(self, index: int) -> TargetDict:
+        """
+        Load the target dict for the given index.
+        Parameters
+        ----------
+        index:
+            The index of the target dict.
+        Returns
+        -------
+        target_dict:
+            The target dictionary.
+        """
+        pass
+    @abstractmethod
+    def __len__(self) -> int:
+        pass

ellipse_rcnn/utils/data/craters.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import h5py
+import torch
+from torch.utils.data import Dataset
+from ellipse_rcnn.utils.types import TargetDict, ImageTargetTuple
+from ellipse_rcnn.utils.conics import bbox_ellipse
+class CraterEllipseDataset(Dataset):
+    """
+    Dataset for crater ellipse detection. Mostly meant as an example in combination with
+    https://github.com/wdoppenberg/crater-detection.
+    """
+    def __init__(self, file_path: str, group: str) -> None:
+        self.file_path = file_path
+        self.group = group
+    def __getitem__(self, idx: torch.Tensor) -> ImageTargetTuple:
+        with h5py.File(self.file_path, "r") as dataset:
+            image = torch.tensor(dataset[self.group]["images"][idx])
+            # The number of instances can vary, hence it was decided to use a separate array with the indices of the
+            # instances.
+            start_idx = dataset[self.group]["craters/crater_list_idx"][idx]
+            end_idx = dataset[self.group]["craters/crater_list_idx"][idx + 1]
+            ellipse_matrices = torch.tensor(
+                dataset[self.group]["craters/A_craters"][start_idx:end_idx]
+            )
+        boxes = bbox_ellipse(ellipse_matrices)
+        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
+        num_objs = len(boxes)
+        labels = torch.ones((num_objs,), dtype=torch.int64)
+        image_id = torch.tensor([idx])
+        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
+        target = TargetDict(
+            boxes=boxes,
+            labels=labels,
+            image_id=image_id,
+            area=area,
+            iscrowd=iscrowd,
+            ellipse_matrices=ellipse_matrices,
+        )
+        return image, target
+    def __len__(self) -> int:
+        with h5py.File(self.file_path, "r") as f:
+            return len(f[self.group]["images"])

ellipse_rcnn/utils/data/fddb.py ADDED Viewed

	@@ -0,0 +1,239 @@

+"""
+Data loader and module for the FDDB dataset.
+https://vis-www.cs.umass.edu/fddb/
+"""
+from glob import glob
+from typing import Any
+from pathlib import Path
+import torch
+import pandas as pd
+import PIL.Image
+import torchvision.transforms
+import pytorch_lightning as pl
+from torch.utils.data import DataLoader, random_split
+from ellipse_rcnn.utils.types import TargetDict, ImageTargetTuple, EllipseTuple
+from ellipse_rcnn.utils.conics import bbox_ellipse, ellipse_to_conic_matrix, conic_center, unimodular_matrix
+from ellipse_rcnn.utils.data.base import EllipseDatasetBase, collate_fn
+def preprocess_label_files(root_path: str) -> dict[str, list[EllipseTuple]]:
+    label_files = glob(f"{root_path}/labels/*.txt")
+    file_paths = []
+    ellipse_data = []
+    for filename in label_files:
+        with open(filename) as f:
+            if "ellipseList" not in filename:
+                file_paths += [p.strip("\n") for p in f.readlines()]
+            else:
+                ellipse_data += [p.strip("\n") for p in f.readlines()]
+    pdf_file_paths = pd.DataFrame({"path": file_paths})
+    pdf_file_paths["path_idx"] = pdf_file_paths.index
+    pdf_ellipse_data = pd.DataFrame({"data": ellipse_data})
+    pdf_ellipse_data["data_idx"] = pdf_ellipse_data.index
+    pdf_file_data_mapping = pdf_file_paths.merge(
+        pdf_ellipse_data, left_on="path", right_on="data", how="left"
+    )
+    ellipse_dict: dict[str, list[EllipseTuple]] = {
+        str(k): [] for k in pdf_file_paths["path"]
+    }
+    for i, r in pdf_file_data_mapping.iterrows():
+        data_idx = r["data_idx"]
+        num_ellipses = int(ellipse_data[data_idx + 1])
+        file_path = r["path"]
+        for j in range(data_idx + 2, data_idx + num_ellipses + 2):
+            a, b, theta, x, y = [
+                float(v) for v in ellipse_data[j].split(" ")[:-1] if len(v) > 0
+            ]
+            ellipse_params = EllipseTuple(a, b, theta, x, y)
+            ellipse_dict[file_path].append(ellipse_params)
+    return ellipse_dict
+class FDDB(EllipseDatasetBase):
+    def __init__(
+        self,
+        root_path: str | Path,
+        ellipse_dict: dict[str, list[EllipseTuple]] | None = None,
+        transform: Any = None,
+    ) -> None:
+        self.root_path = Path(root_path) if isinstance(root_path, str) else root_path
+        if transform is None:
+            self.transform = torchvision.transforms.Compose(
+                [
+                    torchvision.transforms.ToTensor(),
+                    torchvision.transforms.Normalize(
+                        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+                    ),
+                ]
+            )
+        else:
+            self.transform = transform
+        self.ellipse_dict = ellipse_dict or preprocess_label_files(root_path)
+    def __len__(self) -> int:
+        return len(self.ellipse_dict)
+    def load_target_dict(self, index: int) -> TargetDict:
+        key = list(self.ellipse_dict.keys())[index]
+        ellipses_list = self.ellipse_dict[key]
+        a = torch.tensor([[e.a for e in ellipses_list]])
+        b = torch.tensor([[e.b for e in ellipses_list]])
+        theta = torch.tensor([[e.theta for e in ellipses_list]])
+        x = torch.tensor([[e.x for e in ellipses_list]])
+        y = torch.tensor([[e.y for e in ellipses_list]])
+        ellipse_matrices = ellipse_to_conic_matrix(a=a, b=b, x=x, y=y, theta=theta)
+        if torch.stack(conic_center(ellipse_matrices)).isnan().any():
+            raise ValueError("NaN values in ellipse matrices. Please check the data.")
+        if len(ellipse_matrices.shape) == 2:
+            ellipse_matrices = ellipse_matrices.unsqueeze(0)
+        boxes = bbox_ellipse(ellipse_matrices, box_type="xyxy")
+        num_objs = len(boxes)
+        labels = torch.ones((num_objs,), dtype=torch.int64)
+        image_id = torch.tensor([index])
+        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
+        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
+        target = TargetDict(
+            boxes=boxes,
+            labels=labels,
+            image_id=image_id,
+            area=area,
+            iscrowd=iscrowd,
+            ellipse_matrices=ellipse_matrices,
+        )
+        return target
+    def load_image(self, index: int) -> PIL.Image.Image:
+        key = list(self.ellipse_dict.keys())[index]
+        file_path = str(Path(self.root_path) / "images" / Path(key)) + ".jpg"
+        return PIL.Image.open(file_path)
+    def __getitem__(self, idx: int) -> ImageTargetTuple:
+        image = self.load_image(idx)
+        target_dict = self.load_target_dict(idx)
+        # If the image is grayscale, convert it to RGB
+        if image.mode == "L":
+            image = image.convert("RGB")
+        image = self.transform(image)
+        return image, target_dict
+    def __repr__(self) -> str:
+        return f"FDDB<img={len(self)}>"
+    def split(self, fraction: float, shuffle: bool = False) -> tuple["FDDB", "FDDB"]:
+        """
+        Splits the dataset into two subsets based on the given fraction.
+        Args:
+            fraction (float): Fraction of the dataset for the first subset (0 < fraction < 1).
+            shuffle (bool): If True, dataset keys will be shuffled before splitting.
+        Returns:
+            tuple[FDDB, FDDB]: Two FDDB instances, one with the fraction of data,
+                               and the other with the remaining data.
+        """
+        if not (0 < fraction < 1):
+            raise ValueError("The fraction must be between 0 and 1.")
+        keys = list(self.ellipse_dict.keys())
+        if shuffle:
+            import random
+            random.shuffle(keys)
+        total_length = len(keys)
+        split_index = int(total_length * fraction)
+        subset1_keys = keys[:split_index]
+        subset2_keys = keys[split_index:]
+        subset1_ellipse_dict = {key: self.ellipse_dict[key] for key in subset1_keys}
+        subset2_ellipse_dict = {key: self.ellipse_dict[key] for key in subset2_keys}
+        subset1 = FDDB(
+            self.root_path, ellipse_dict=subset1_ellipse_dict, transform=self.transform
+        )
+        subset2 = FDDB(
+            self.root_path, ellipse_dict=subset2_ellipse_dict, transform=self.transform
+        )
+        return subset1, subset2
+class FDDBLightningDataModule(pl.LightningDataModule):
+    def __init__(
+        self,
+        data_dir: str,
+        batch_size: int = 16,
+        train_fraction: float = 0.8,
+        transform: Any = None,
+        num_workers: int = 0,
+    ) -> None:
+        super().__init__()
+        self.data_dir = data_dir
+        self.batch_size = batch_size
+        self.train_fraction = train_fraction
+        self.transform = transform
+        self.dataset: FDDB | None = None
+        self.train_dataset = None
+        self.val_dataset = None
+        self.num_workers = num_workers
+    def prepare_data(self) -> None:
+        # Ensure data preparation or downloading is done here.
+        pass
+    def setup(self, stage: str | None = None) -> None:
+        # Instantiate the FDDB dataset and split it into training and validation subsets.
+        self.dataset = FDDB(self.data_dir, transform=self.transform)
+        train_size = int(len(self.dataset) * self.train_fraction)
+        val_size = len(self.dataset) - train_size
+        self.train_dataset, self.val_dataset = random_split(
+            self.dataset, [train_size, val_size]
+        )
+    def train_dataloader(self) -> DataLoader[ImageTargetTuple]:
+        return DataLoader(
+            self.train_dataset,
+            batch_size=self.batch_size,
+            shuffle=True,
+            collate_fn=collate_fn,
+            num_workers=self.num_workers,
+        )
+    def val_dataloader(self) -> DataLoader[ImageTargetTuple]:
+        return DataLoader(
+            self.val_dataset,
+            batch_size=self.batch_size,
+            collate_fn=collate_fn,
+            num_workers=self.num_workers,
+        )
+    def test_dataloader(self) -> DataLoader[ImageTargetTuple]:
+        # Placeholder for test data; currently returns the validation dataloader as a default.
+        return DataLoader(
+            self.val_dataset, batch_size=self.batch_size, collate_fn=collate_fn
+        )

ellipse_rcnn/utils/types.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from typing import TypedDict, NamedTuple
+import torch
+class TargetDict(TypedDict):
+    boxes: torch.Tensor
+    labels: torch.Tensor
+    image_id: torch.Tensor
+    area: torch.Tensor
+    iscrowd: torch.Tensor
+    ellipse_matrices: torch.Tensor
+class LossDict(TypedDict, total=False):
+    loss_classifier: torch.Tensor
+    loss_box_reg: torch.Tensor
+    loss_objectness: torch.Tensor
+    loss_rpn_box_reg: torch.Tensor
+    loss_ellipse_kld: torch.Tensor
+    loss_ellipse_smooth_l1: torch.Tensor
+    loss_total: torch.Tensor
+class PredictionDict(TypedDict):
+    bboxes: torch.Tensor
+    labels: torch.Tensor
+    scores: torch.Tensor
+    ellipse_matrices: torch.Tensor
+type ImageTargetTuple = tuple[torch.Tensor, TargetDict]  # Tensor shape: (C, H, W)
+type CollatedBatchType = tuple[
+    tuple[torch.Tensor, ...], tuple[TargetDict, ...]
+]  # Tensor shape: (C, H, W)
+type UncollatedBatchType = list[ImageTargetTuple]
+type EllipseType = torch.Tensor
+class EllipseTuple(NamedTuple):
+    a: float
+    b: float
+    theta: float
+    x: float
+    y: float

ellipse_rcnn/utils/viz.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from __future__ import annotations
+from typing import Literal
+from torch import Tensor
+import numpy as np
+import torch
+from torchvision.ops import boxes as box_ops
+from matplotlib import pyplot as plt
+from matplotlib.axes import Axes
+from matplotlib.collections import EllipseCollection, PatchCollection
+from matplotlib.patches import Rectangle
+from ellipse_rcnn.utils.conics import ellipse_angle, conic_center, ellipse_axes
+from matplotlib.figure import Figure
+def plot_single_pred(
+    image: Tensor,
+    prediction,
+    min_score: float = 0.75,
+) -> Figure:
+    if isinstance(prediction, list):
+        if len(prediction) > 1:
+            raise ValueError(
+                "Multiple predictions detected. Please pass a single prediction."
+            )
+        prediction = prediction[0]
+    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
+    fig.patch.set_alpha(0)
+    ax.imshow(image.permute(1, 2, 0), cmap="grey")
+    score_mask = prediction["scores"] > min_score
+    plot_ellipses(prediction["ellipse_matrices"][score_mask], ax=ax)
+    return fig
+def plot_ellipses(
+    A_craters: torch.Tensor,
+    figsize: tuple[float, float] = (15, 15),
+    plot_centers: bool = False,
+    ax: Axes | None = None,
+    rim_color="r",
+    alpha=1.0,
+):
+    a_proj, b_proj = ellipse_axes(A_craters)
+    psi_proj = ellipse_angle(A_craters)
+    x_pix_proj, y_pix_proj = conic_center(A_craters)
+    a_proj, b_proj, psi_proj, x_pix_proj, y_pix_proj = map(
+        lambda t: t.detach().cpu().numpy(),
+        (a_proj, b_proj, psi_proj, x_pix_proj, y_pix_proj),
+    )
+    if ax is None:
+        fig, ax = plt.subplots(figsize=figsize, subplot_kw={"aspect": "equal"})
+    ec = EllipseCollection(
+        a_proj * 2,
+        b_proj * 2,
+        np.degrees(psi_proj),
+        units="xy",
+        offsets=np.column_stack((x_pix_proj, y_pix_proj)),
+        transOffset=ax.transData,
+        facecolors="None",
+        edgecolors=rim_color,
+        alpha=alpha,
+    )
+    ax.add_collection(ec)
+    if plot_centers:
+        crater_centers = conic_center(A_craters)
+        for k, c_i in enumerate(crater_centers):
+            x, y = c_i[0], c_i[1]
+            ax.text(x.item(), y.item(), str(k), color=rim_color)
+def plot_bboxes(
+    boxes: torch.Tensor,
+    box_type: Literal["xyxy", "xywh", "cxcywh"] = "xyxy",
+    figsize: tuple[float, float] = (15, 15),
+    plot_centers: bool = False,
+    ax: Axes | None = None,
+    rim_color="r",
+    alpha=1.0,
+):
+    if ax is None:
+        fig, ax = plt.subplots(figsize=figsize, subplot_kw={"aspect": "equal"})
+    if box_type != "xyxy":
+        boxes = box_ops.box_convert(boxes, box_type, "xyxy")
+    boxes = boxes.detach().cpu().numpy()
+    rectangles = []
+    for k, b_i in enumerate(boxes):
+        x1, y1, x2, y2 = b_i
+        rectangles.append(Rectangle((x1, y1), x2 - x1, y2 - y1))
+    collection = PatchCollection(
+        rectangles, edgecolor=rim_color, facecolor="none", alpha=alpha
+    )
+    ax.add_collection(collection)
+    if plot_centers:
+        for k, b_i in enumerate(boxes):
+            x1, y1, x2, y2 = b_i
+            ax.text(x1, y1, str(k), color=rim_color)

examples/image1.jpg ADDED Viewed

examples/image2.jpg ADDED Viewed

examples/image3.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio
+torch
+torchvision
+matplotlib
+Pillow
+joblib
+huggingface_hub
+lightning