Spaces:

henry000
/

YOLO

Running

App Files Files Community

lucytuan commited on Apr 25, 2024

Commit

4ebaf9d

2 Parent(s): e3d53d5 23db031

Merge branch 'DATASET' of https://github.com/WongKinYiu/yolov9mit into DATASET

Browse files

Files changed (20) hide show

.github/workflows/main.yaml +26 -0
.pre-commit-config.yaml +6 -0
README.md +15 -1
config/config.py +1 -1
config/config.yaml +1 -0
config/data/augmentation.yaml +2 -2
config/hyper/default.yaml +5 -0
config/model/v7-base.yaml +1 -0
model/module.py +30 -4
model/yolo.py +9 -3
requirements.txt +2 -0
tests/test_model/test_yolo.py +48 -0
tests/test_utils/test_dataaugment.py +64 -0
tools/layer_helper.py +2 -0
tools/log_helper.py +1 -0
train.py +5 -2
utils/data_augment.py +8 -10
utils/dataloader.py +55 -13
utils/drawer.py +21 -12
utils/get_dataset.py +1 -1

.github/workflows/main.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+name: YOLOv9 - Model test
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -r requirements.txt
+    - name: Test with pytest
+      run: |
+        pytest

.pre-commit-config.yaml CHANGED Viewed

@@ -6,3 +6,9 @@ repos:
         language_version: python3  # Specify the Python version
         exclude: '.*\.yaml$'  # Regex pattern to exclude all YAML files
         args: ["--line-length", "120"]  # Set max line length to 100 characters

         language_version: python3  # Specify the Python version
         exclude: '.*\.yaml$'  # Regex pattern to exclude all YAML files
         args: ["--line-length", "120"]  # Set max line length to 100 characters
+  - repo: https://github.com/pre-commit/mirrors-isort
+    rev: v5.10.1  # Use the appropriate version or "stable" for the latest stable release
+    hooks:
+      - id: isort
+        args: ["--profile", "black", "--verbose"]

README.md CHANGED Viewed

@@ -1,6 +1,20 @@
 # YOLOv9-MIT
 An MIT license rewrite of YOLOv9
 ## To-Do Lists
 - [ ] Project Setup
     - [X] requirements
@@ -16,7 +30,7 @@ An MIT license rewrite of YOLOv9
         - [ ] Auto Download
         - [ ] xywh, xxyy, xcyc
     - [ ] Dataloder
-        - [ ] Data arugment
 - [ ] Model
     - [ ] load model
         - [ ] from yaml

 # YOLOv9-MIT
 An MIT license rewrite of YOLOv9
+![WIP](https://img.shields.io/badge/status-WIP-orange)
+> [!IMPORTANT]
+> This project is currently a Work In Progress and may undergo significant changes. It is not recommended for use in production environments until further notice. Please check back regularly for updates.
+>
+> Use of this code is at your own risk and discretion. It is advisable to consult with the project owner before deploying or integrating into any critical systems.
+## Contributing
+While the project's structure is still being finalized, we ask that potential contributors wait for these foundational decisions to be made. We greatly appreciate your patience and are excited to welcome contributions from the community once we are ready. Alternatively, you are welcome to propose functions that should be implemented based on the original YOLO version or suggest other enhancements!
+If you are interested in contributing, please keep an eye on project updates or contact us directly at [[email protected]](mailto:[email protected]) for more information.
 ## To-Do Lists
 - [ ] Project Setup
     - [X] requirements
         - [ ] Auto Download
         - [ ] xywh, xxyy, xcyc
     - [ ] Dataloder
+        - [ ] Data augment
 - [ ] Model
     - [ ] load model
         - [ ] from yaml

config/config.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import List, Dict, Union
 @dataclass

 from dataclasses import dataclass
+from typing import Dict, List, Union
 @dataclass

config/config.yaml CHANGED Viewed

@@ -7,4 +7,5 @@ defaults:
  - download: ../data/download
  - augmentation: ../data/augmentation
  - model: v7-base
  - _self_

  - download: ../data/download
  - augmentation: ../data/augmentation
  - model: v7-base
+ - hyper: default
  - _self_

config/data/augmentation.yaml CHANGED Viewed

@@ -1,3 +1,3 @@
 Mosaic: 1
-MixUp: 1
-RandomHorizontalFlip: 0.5

 Mosaic: 1
+# MixUp: 1
+HorizontalFlip: 0.5

config/hyper/default.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+data:
+  batch_size: 4
+  shuffle: True
+  num_workers: 4
+  pin_memory: True

config/model/v7-base.yaml CHANGED Viewed

@@ -241,3 +241,4 @@ model:
             - [36,75, 76,55, 72,146]  # P4/16
             - [142,110, 192,243, 459,401]  # P5/32
       source: [102, 103, 104]

             - [36,75, 76,55, 72,146]  # P4/16
             - [142,110, 192,243, 459,401]  # P5/32
       source: [102, 103, 104]
+      output: True

model/module.py CHANGED Viewed

@@ -11,10 +11,10 @@ class Conv(nn.Module):
         out_channels,
         kernel_size,
         stride=1,
-        padding=0,
         dilation=1,
         groups=1,
-        act=nn.ReLU(),
         bias=False,
         auto_padding=True,
         padding_mode="zeros",
@@ -48,10 +48,12 @@ class Conv(nn.Module):
 # RepVGG
 class RepConv(nn.Module):
     # https://github.com/DingXiaoH/RepVGG
-    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, groups=1, act=nn.ReLU()):
         super().__init__()
         self.conv1 = Conv(in_channels, out_channels, kernel_size, stride, groups=groups, act=False)
         self.conv2 = Conv(in_channels, out_channels, 1, stride, groups=groups, act=False)
         self.act = act if isinstance(act, nn.Module) else nn.Identity()
@@ -64,6 +66,30 @@ class RepConv(nn.Module):
     # to be implement
     # def fuse_convs(self):
 # ResNet

         out_channels,
         kernel_size,
         stride=1,
+        padding=None,
         dilation=1,
         groups=1,
+        act=nn.SiLU(),
         bias=False,
         auto_padding=True,
         padding_mode="zeros",
 # RepVGG
 class RepConv(nn.Module):
     # https://github.com/DingXiaoH/RepVGG
+    def __init__(
+        self, in_channels, out_channels, kernel_size=3, padding=None, stride=1, groups=1, act=nn.SiLU(), deploy=False
+    ):
         super().__init__()
+        self.deploy = deploy
         self.conv1 = Conv(in_channels, out_channels, kernel_size, stride, groups=groups, act=False)
         self.conv2 = Conv(in_channels, out_channels, 1, stride, groups=groups, act=False)
         self.act = act if isinstance(act, nn.Module) else nn.Identity()
     # to be implement
     # def fuse_convs(self):
+    def fuse_conv_bn(self, conv, bn):
+        std = (bn.running_var + bn.eps).sqrt()
+        bias = bn.bias - bn.running_mean * bn.weight / std
+        t = (bn.weight / std).reshape(-1, 1, 1, 1)
+        weights = conv.weight * t
+        bn = nn.Identity()
+        conv = nn.Conv2d(
+            in_channels=conv.in_channels,
+            out_channels=conv.out_channels,
+            kernel_size=conv.kernel_size,
+            stride=conv.stride,
+            padding=conv.padding,
+            dilation=conv.dilation,
+            groups=conv.groups,
+            bias=True,
+            padding_mode=conv.padding_mode,
+        )
+        conv.weight = torch.nn.Parameter(weights)
+        conv.bias = torch.nn.Parameter(bias)
+        return conv
 # ResNet

model/yolo.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch
 import torch.nn as nn
 from loguru import logger
 from omegaconf import OmegaConf
 from tools.layer_helper import get_layer_map
@@ -32,6 +33,7 @@ class YOLO(nn.Module):
                 layer_type, layer_info = next(iter(layer_spec.items()))
                 layer_args = layer_info.get("args", {})
                 source = layer_info.get("source", -1)
                 if isinstance(source, str):
                     source = layer_indices_by_tag[source]
@@ -41,7 +43,7 @@ class YOLO(nn.Module):
                     layer_args["nc"] = self.nc
                     layer_args["ch"] = [output_dim[idx] for idx in source]
-                layer = self.create_layer(layer_type, source, **layer_args)
                 model_list.append(layer)
                 if "tags" in layer_info:
@@ -55,6 +57,7 @@ class YOLO(nn.Module):
     def forward(self, x):
         y = [x]
         for layer in self.model:
             if OmegaConf.is_list(layer.source):
                 model_input = [y[idx] for idx in layer.source]
@@ -62,7 +65,9 @@ class YOLO(nn.Module):
                 model_input = y[layer.source]
             x = layer(model_input)
             y.append(x)
-        return x
     def get_out_channels(self, layer_type: str, layer_args: dict, output_dim: list, source: Union[int, list]):
         if "Conv" in layer_type:
@@ -74,10 +79,11 @@ class YOLO(nn.Module):
         if layer_type == "IDetect":
             return None
-    def create_layer(self, layer_type: str, source: Union[int, list], **kwargs):
         if layer_type in self.layer_map:
             layer = self.layer_map[layer_type](**kwargs)
             layer.source = source
             return layer
         else:
             raise ValueError(f"Unsupported layer type: {layer_type}")

 import torch.nn as nn
 from loguru import logger
 from omegaconf import OmegaConf
 from tools.layer_helper import get_layer_map
                 layer_type, layer_info = next(iter(layer_spec.items()))
                 layer_args = layer_info.get("args", {})
                 source = layer_info.get("source", -1)
+                output = layer_info.get("output", False)
                 if isinstance(source, str):
                     source = layer_indices_by_tag[source]
                     layer_args["nc"] = self.nc
                     layer_args["ch"] = [output_dim[idx] for idx in source]
+                layer = self.create_layer(layer_type, source, output, **layer_args)
                 model_list.append(layer)
                 if "tags" in layer_info:
     def forward(self, x):
         y = [x]
+        output = []
         for layer in self.model:
             if OmegaConf.is_list(layer.source):
                 model_input = [y[idx] for idx in layer.source]
                 model_input = y[layer.source]
             x = layer(model_input)
             y.append(x)
+            if layer.output:
+                output.append(x)
+        return output
     def get_out_channels(self, layer_type: str, layer_args: dict, output_dim: list, source: Union[int, list]):
         if "Conv" in layer_type:
         if layer_type == "IDetect":
             return None
+    def create_layer(self, layer_type: str, source: Union[int, list], output=False, **kwargs):
         if layer_type in self.layer_map:
             layer = self.layer_map[layer_type](**kwargs)
             layer.source = source
+            layer.output = output
             return layer
         else:
             raise ValueError(f"Unsupported layer type: {layer_type}")

requirements.txt CHANGED Viewed

@@ -1,11 +1,13 @@
 hydra-core
 loguru
 numpy
 pytest
 pyyaml
 requests
 rich
 torch
 tqdm
 Pillow
 diskcache

 hydra-core
 loguru
 numpy
+Pillow
 pytest
 pyyaml
 requests
 rich
 torch
+torchvision
 tqdm
 Pillow
 diskcache

tests/test_model/test_yolo.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import sys
+import pytest
+import torch
+from hydra import compose, initialize
+from hydra.core.global_hydra import GlobalHydra
+from omegaconf import DictConfig, OmegaConf
+sys.path.append("./")
+from model.yolo import YOLO, get_model
+config_path = "../../config/model"
+config_name = "v7-base"
+def test_build_model():
+    with initialize(config_path=config_path, version_base=None):
+        model_cfg = compose(config_name=config_name)
+        OmegaConf.set_struct(model_cfg, False)
+        model = YOLO(model_cfg)
+        model.build_model(model_cfg.model)
+        assert len(model.model) == 106
+def test_get_model():
+    with initialize(config_path=config_path, version_base=None):
+        model_cfg = compose(config_name=config_name)
+        model = get_model(model_cfg)
+        assert isinstance(model, YOLO)
+def test_yolo_forward_output_shape():
+    with initialize(config_path=config_path, version_base=None):
+        model_cfg = compose(config_name=config_name)
+        model = get_model(model_cfg)
+        # 2 - batch size, 3 - number of channels, 640x640 - image dimensions
+        dummy_input = torch.rand(2, 3, 640, 640)
+        # Forward pass through the model
+        output = model(dummy_input)
+        output_shape = [x.shape for x in output[-1]]
+        assert output_shape == [
+            torch.Size([2, 3, 20, 20, 85]),
+            torch.Size([2, 3, 80, 80, 85]),
+            torch.Size([2, 3, 40, 40, 85]),
+        ]

tests/test_utils/test_dataaugment.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import sys
+import pytest
+import torch
+from PIL import Image
+from torchvision.transforms import functional as TF
+sys.path.append("./")
+from utils.data_augment import Compose, Mosaic, RandomHorizontalFlip
+def test_random_horizontal_flip():
+    # Create a mock image and bounding boxes
+    img = Image.new("RGB", (100, 100), color="red")
+    boxes = torch.tensor([[1, 0.1, 0.1, 0.9, 0.9]])  # class, xmin, ymin, xmax, ymax
+    flip_transform = RandomHorizontalFlip(prob=1)  # Set probability to 1 to ensure flip
+    flipped_img, flipped_boxes = flip_transform(img, boxes)
+    # Assert image is flipped by comparing it to a manually flipped image
+    assert TF.hflip(img) == flipped_img
+    # Assert bounding boxes are flipped correctly
+    expected_boxes = torch.tensor([[1, 0.1, 0.1, 0.9, 0.9]])
+    expected_boxes[:, [1, 3]] = 1 - expected_boxes[:, [3, 1]]
+    assert torch.allclose(flipped_boxes, expected_boxes), "Bounding boxes were not flipped correctly"
+def test_compose():
+    # Define two mock transforms that simply return the inputs
+    def mock_transform(image, boxes):
+        return image, boxes
+    compose = Compose([mock_transform, mock_transform])
+    img = Image.new("RGB", (10, 10), color="blue")
+    boxes = torch.tensor([[0, 0.2, 0.2, 0.8, 0.8]])
+    transformed_img, transformed_boxes = compose(img, boxes)
+    assert transformed_img == img, "Image should not be altered"
+    assert torch.equal(transformed_boxes, boxes), "Boxes should not be altered"
+def test_mosaic():
+    img = Image.new("RGB", (100, 100), color="green")
+    boxes = torch.tensor([[0, 0.25, 0.25, 0.75, 0.75]])
+    # Mock parent with image_size and get_more_data method
+    class MockParent:
+        image_size = 100
+        def get_more_data(self, num_images):
+            return [(img, boxes) for _ in range(num_images)]
+    mosaic = Mosaic(prob=1)  # Ensure mosaic is applied
+    mosaic.set_parent(MockParent())
+    mosaic_img, mosaic_boxes = mosaic(img, boxes)
+    # Checks here would depend on the exact expected behavior of the mosaic function,
+    # such as dimensions and content of the output image and boxes.
+    assert mosaic_img.size == (200, 200), "Mosaic image size should be doubled"
+    assert len(mosaic_boxes) > 0, "Should have some bounding boxes"

tools/layer_helper.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import inspect
 import torch.nn as nn
 from model import module

 import inspect
 import torch.nn as nn
 from model import module

tools/log_helper.py CHANGED Viewed

@@ -12,6 +12,7 @@ Example:
 """
 import sys
 from loguru import logger

 """
 import sys
 from loguru import logger

train.py CHANGED Viewed

@@ -1,13 +1,16 @@
 from loguru import logger
 from model.yolo import get_model
 from tools.log_helper import custom_logger
 from utils.get_dataset import prepare_dataset
-import hydra
-from config.config import Config
 @hydra.main(config_path="config", config_name="config", version_base=None)
 def main(cfg: Config):
     if cfg.download.auto:
         prepare_dataset(cfg.download)

+import hydra
 from loguru import logger
+from config.config import Config
 from model.yolo import get_model
 from tools.log_helper import custom_logger
+from utils.dataloader import YoloDataset
 from utils.get_dataset import prepare_dataset
 @hydra.main(config_path="config", config_name="config", version_base=None)
 def main(cfg: Config):
+    dataset = YoloDataset(cfg)
     if cfg.download.auto:
         prepare_dataset(cfg.download)

utils/data_augment.py CHANGED Viewed

@@ -1,15 +1,15 @@
-from PIL import Image
 import numpy as np
 import torch
 from torchvision.transforms import functional as TF
-from torchvision.transforms.functional import to_tensor, to_pil_image
 class Compose:
     """Composes several transforms together."""
-    def __init__(self, transforms):
         self.transforms = transforms
         for transform in self.transforms:
             if hasattr(transform, "set_parent"):
@@ -20,11 +20,8 @@ class Compose:
             image, boxes = transform(image, boxes)
         return image, boxes
-    def get_more_data(self):
-        raise NotImplementedError("This method should be overridden by subclass instances!")
-class RandomHorizontalFlip:
     """Randomly horizontally flips the image along with the bounding boxes."""
     def __init__(self, prob=0.5):
@@ -37,7 +34,7 @@ class RandomHorizontalFlip:
         return image, boxes
-class RandomVerticalFlip:
     """Randomly vertically flips the image along with the bounding boxes."""
     def __init__(self, prob=0.5):
@@ -90,6 +87,7 @@ class Mosaic:
             all_labels.append(adjusted_boxes)
         all_labels = torch.cat(all_labels, dim=0)
         return mosaic_image, all_labels
@@ -118,10 +116,10 @@ class MixUp:
         lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0 else 0.5
         # Mix images
-        image1, image2 = to_tensor(image), to_tensor(image2)
         mixed_image = lam * image1 + (1 - lam) * image2
         # Mix bounding boxes
         mixed_boxes = torch.cat([lam * boxes, (1 - lam) * boxes2])
-        return to_pil_image(mixed_image), mixed_boxes

 import numpy as np
 import torch
+from PIL import Image
 from torchvision.transforms import functional as TF
 class Compose:
     """Composes several transforms together."""
+    def __init__(self, transforms, image_size: int = 640):
         self.transforms = transforms
+        self.image_size = image_size
         for transform in self.transforms:
             if hasattr(transform, "set_parent"):
             image, boxes = transform(image, boxes)
         return image, boxes
+class HorizontalFlip:
     """Randomly horizontally flips the image along with the bounding boxes."""
     def __init__(self, prob=0.5):
         return image, boxes
+class VerticalFlip:
     """Randomly vertically flips the image along with the bounding boxes."""
     def __init__(self, prob=0.5):
             all_labels.append(adjusted_boxes)
         all_labels = torch.cat(all_labels, dim=0)
+        mosaic_image = mosaic_image.resize((img_sz, img_sz))
         return mosaic_image, all_labels
         lam = np.random.beta(self.alpha, self.alpha) if self.alpha > 0 else 0.5
         # Mix images
+        image1, image2 = TF.to_tensor(image), TF.to_tensor(image2)
         mixed_image = lam * image1 + (1 - lam) * image2
         # Mix bounding boxes
         mixed_boxes = torch.cat([lam * boxes, (1 - lam) * boxes2])
+        return TF.to_pil_image(mixed_image), mixed_boxes

utils/dataloader.py CHANGED Viewed

@@ -1,26 +1,30 @@
-from PIL import Image
-from os import path, listdir
 import hydra
 import numpy as np
 import torch
-from torch.utils.data import Dataset
 from loguru import logger
 from tqdm.rich import tqdm
-import diskcache as dc
-from typing import Union
-from drawer import draw_bboxes
-from data_augment import Compose, RandomHorizontalFlip, RandomVerticalFlip, Mosaic, MixUp
 class YoloDataset(Dataset):
-    def __init__(self, dataset_cfg: dict, phase: str = "train", image_size: int = 640, transform=None):
         phase_name = dataset_cfg.get(phase, phase)
         self.image_size = image_size
-        self.transform = transform
         self.transform.get_more_data = self.get_more_data
-        self.transform.image_size = self.image_size
         self.data = self.load_data(dataset_cfg.path, phase_name)
     def load_data(self, dataset_path, phase_name):
@@ -121,17 +125,55 @@ class YoloDataset(Dataset):
         img, bboxes = self.get_data(idx)
         if self.transform:
             img, bboxes = self.transform(img, bboxes)
         return img, bboxes
     def __len__(self) -> int:
         return len(self.data)
 @hydra.main(config_path="../config", config_name="config", version_base=None)
 def main(cfg):
-    transform = Compose([eval(aug)(prob) for aug, prob in cfg.augmentation.items()])
-    dataset = YoloDataset(cfg.data, transform=transform)
-    draw_bboxes(*dataset[0])
 if __name__ == "__main__":

+from os import listdir, path
+from typing import List, Tuple, Union
+import diskcache as dc
 import hydra
 import numpy as np
 import torch
 from loguru import logger
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+from torchvision.transforms import functional as TF
 from tqdm.rich import tqdm
+from utils.data_augment import Compose, HorizontalFlip, MixUp, Mosaic, VerticalFlip
+from utils.drawer import draw_bboxes
 class YoloDataset(Dataset):
+    def __init__(self, config: dict, phase: str = "train", image_size: int = 640):
+        dataset_cfg = config.data
+        augment_cfg = config.augmentation
         phase_name = dataset_cfg.get(phase, phase)
         self.image_size = image_size
+        transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
+        self.transform = Compose(transforms, self.image_size)
         self.transform.get_more_data = self.get_more_data
         self.data = self.load_data(dataset_cfg.path, phase_name)
     def load_data(self, dataset_path, phase_name):
         img, bboxes = self.get_data(idx)
         if self.transform:
             img, bboxes = self.transform(img, bboxes)
+        img = TF.to_tensor(img)
         return img, bboxes
     def __len__(self) -> int:
         return len(self.data)
+class YoloDataLoader(DataLoader):
+    def __init__(self, config: dict):
+        """Initializes the YoloDataLoader with hydra-config files."""
+        hyper = config.hyper.data
+        dataset = YoloDataset(config)
+        super().__init__(
+            dataset,
+            batch_size=hyper.batch_size,
+            shuffle=hyper.shuffle,
+            num_workers=hyper.num_workers,
+            pin_memory=hyper.pin_memory,
+            collate_fn=self.collate_fn,
+        )
+    def collate_fn(self, batch: List[Tuple[torch.Tensor, torch.Tensor]]) -> Tuple[torch.Tensor, List[torch.Tensor]]:
+        """
+        A collate function to handle batching of images and their corresponding targets.
+        Args:
+            batch (list of tuples): Each tuple contains:
+                - image (torch.Tensor): The image tensor.
+                - labels (torch.Tensor): The tensor of labels for the image.
+        Returns:
+            Tuple[torch.Tensor, List[torch.Tensor]]: A tuple containing:
+                - A tensor of batched images.
+                - A list of tensors, each corresponding to bboxes for each image in the batch.
+        """
+        images = torch.stack([item[0] for item in batch])
+        targets = [item[1] for item in batch]
+        return images, targets
+def get_dataloader(config):
+    return YoloDataLoader(config)
 @hydra.main(config_path="../config", config_name="config", version_base=None)
 def main(cfg):
+    dataloader = get_dataloader(cfg)
+    draw_bboxes(next(iter(dataloader)))
 if __name__ == "__main__":

utils/drawer.py CHANGED Viewed

@@ -1,23 +1,31 @@
 from PIL import Image, ImageDraw, ImageFont
-def draw_bboxes(img, bboxes):
     """
     Draw bounding boxes on an image.
     Args:
-    - image_path (str): Path to the image file.
-    - bboxes (list of lists/tuples): Bounding boxes with [x_min, y_min, x_max, y_max, class_id].
     """
-    # Load an image
-    draw = ImageDraw.Draw(img)
-    # Font for class_id (optional)
-    try:
-        font = ImageFont.truetype("arial.ttf", 30)
-    except IOError:
-        font = ImageFont.load_default(30)
     width, height = img.size
     for bbox in bboxes:
         class_id, x_min, y_min, x_max, y_max = bbox
@@ -26,7 +34,8 @@ def draw_bboxes(img, bboxes):
         y_min = y_min * height
         y_max = y_max * height
         shape = [(x_min, y_min), (x_max, y_max)]
-        draw.rectangle(shape, outline="red", width=2)
         draw.text((x_min, y_min), str(int(class_id)), font=font, fill="blue")
-    img.save("output.jpg")

+from typing import List, Union
+import torch
+from loguru import logger
 from PIL import Image, ImageDraw, ImageFont
+from torchvision.transforms.functional import to_pil_image
+def draw_bboxes(img: Union[Image.Image, torch.Tensor], bboxes: List[List[Union[int, float]]]):
     """
     Draw bounding boxes on an image.
     Args:
+    - img (PIL Image or torch.Tensor): Image on which to draw the bounding boxes.
+    - bboxes (List of Lists/Tensors): Bounding boxes with [class_id, x_min, y_min, x_max, y_max],
+      where coordinates are normalized [0, 1].
     """
+    # Convert tensor image to PIL Image if necessary
+    if isinstance(img, torch.Tensor):
+        if img.dim() > 3:
+            logger.info("Multi-frame tensor detected, using the first image.")
+            img = img[0]
+            bboxes = bboxes[0]
+        img = to_pil_image(img)
+    draw = ImageDraw.Draw(img)
     width, height = img.size
+    font = ImageFont.load_default(30)
     for bbox in bboxes:
         class_id, x_min, y_min, x_max, y_max = bbox
         y_min = y_min * height
         y_max = y_max * height
         shape = [(x_min, y_min), (x_max, y_max)]
+        draw.rectangle(shape, outline="red", width=3)
         draw.text((x_min, y_min), str(int(class_id)), font=font, fill="blue")
+    img.save("visualize.jpg")  # Save the image with annotations
+    logger.info("Saved visualize image at visualize.png")

utils/get_dataset.py CHANGED Viewed

@@ -2,8 +2,8 @@ import os
 import zipfile
 import hydra
-from loguru import logger
 import requests
 from tqdm.rich import tqdm

 import zipfile
 import hydra
 import requests
+from loguru import logger
 from tqdm.rich import tqdm