Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Jun 3, 2024

Commit

597f02f

2 Parent(s): 306fc38 7967aab

🔀 [Merge] branch 'SETUP' into INFERENCE

Browse files

Files changed (4) hide show

yolo/tools/data_augmentation.py +25 -0
yolo/tools/data_loader.py +8 -8
yolo/tools/drawer.py +10 -4
yolo/utils/dataset_utils.py +3 -1

yolo/tools/data_augmentation.py CHANGED Viewed

@@ -10,6 +10,7 @@ class AugmentationComposer:
     def __init__(self, transforms, image_size: int = 640):
         self.transforms = transforms
         self.image_size = image_size
         for transform in self.transforms:
             if hasattr(transform, "set_parent"):
@@ -18,9 +19,33 @@ class AugmentationComposer:
     def __call__(self, image, boxes):
         for transform in self.transforms:
             image, boxes = transform(image, boxes)
         return image, boxes
 class HorizontalFlip:
     """Randomly horizontally flips the image along with the bounding boxes."""

     def __init__(self, transforms, image_size: int = 640):
         self.transforms = transforms
         self.image_size = image_size
+        self.pad_resize = PadAndResize(self.image_size)
         for transform in self.transforms:
             if hasattr(transform, "set_parent"):
     def __call__(self, image, boxes):
         for transform in self.transforms:
             image, boxes = transform(image, boxes)
+        image, boxes = self.pad_resize(image, boxes)
         return image, boxes
+class PadAndResize:
+    def __init__(self, image_size):
+        """Initialize the object with the target image size."""
+        self.image_size = image_size
+    def __call__(self, image, boxes):
+        original_size = max(image.size)
+        scale = self.image_size / original_size
+        square_img = Image.new("RGB", (original_size, original_size), (255, 255, 255))
+        left = (original_size - image.width) // 2
+        top = (original_size - image.height) // 2
+        square_img.paste(image, (left, top))
+        resized_img = square_img.resize((self.image_size, self.image_size))
+        boxes[:, 1] = (boxes[:, 1] + left) * scale
+        boxes[:, 2] = (boxes[:, 2] + top) * scale
+        boxes[:, 3] = (boxes[:, 3] + left) * scale
+        boxes[:, 4] = (boxes[:, 4] + top) * scale
+        return resized_img, boxes
 class HorizontalFlip:
     """Randomly horizontally flips the image along with the bounding boxes."""

yolo/tools/data_loader.py CHANGED Viewed

@@ -32,8 +32,7 @@ from yolo.utils.dataset_utils import (
 class YoloDataset(Dataset):
     def __init__(self, config: TrainConfig, phase: str = "train2017", image_size: int = 640):
         augment_cfg = config.data.data_augment
-        # TODO: add yaml -> train: train2017
-        phase_name = config.dataset.auto_download.get(phase, phase)
         self.image_size = image_size
         transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
@@ -102,13 +101,14 @@ class YoloDataset(Dataset):
                     continue
                 with open(label_path, "r") as file:
                     image_seg_annotations = [list(map(float, line.strip().split())) for line in file]
             labels = self.load_valid_labels(image_id, image_seg_annotations)
-            if labels is not None:
-                img_path = path.join(images_path, image_name)
-                data.append((img_path, labels))
-                valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data
@@ -135,7 +135,7 @@ class YoloDataset(Dataset):
             return torch.stack(bboxes)
         else:
             logger.warning("No valid BBox in {}", label_path)
-            return None
     def get_data(self, idx):
         img_path, bboxes = self.data[idx]
@@ -161,7 +161,7 @@ class YoloDataLoader(DataLoader):
     def __init__(self, config: Config):
         """Initializes the YoloDataLoader with hydra-config files."""
         data_cfg = config.task.data
-        dataset = YoloDataset(config.task)
         super().__init__(
             dataset,

 class YoloDataset(Dataset):
     def __init__(self, config: TrainConfig, phase: str = "train2017", image_size: int = 640):
         augment_cfg = config.data.data_augment
+        phase_name = config.dataset.get(phase, phase)
         self.image_size = image_size
         transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
                     continue
                 with open(label_path, "r") as file:
                     image_seg_annotations = [list(map(float, line.strip().split())) for line in file]
+            else:
+                image_seg_annotations = []
             labels = self.load_valid_labels(image_id, image_seg_annotations)
+            img_path = path.join(images_path, image_name)
+            data.append((img_path, labels))
+            valid_inputs += 1
         logger.info("Recorded {}/{} valid inputs", valid_inputs, len(images_list))
         return data
             return torch.stack(bboxes)
         else:
             logger.warning("No valid BBox in {}", label_path)
+            return torch.zeros((0, 5))
     def get_data(self, idx):
         img_path, bboxes = self.data[idx]
     def __init__(self, config: Config):
         """Initializes the YoloDataLoader with hydra-config files."""
         data_cfg = config.task.data
+        dataset = YoloDataset(config.task, config.task.task)
         super().__init__(
             dataset,

yolo/tools/drawer.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import List, Union
 import numpy as np
@@ -8,7 +9,11 @@ from torchvision.transforms.functional import to_pil_image
 def draw_bboxes(
-    img: Union[Image.Image, torch.Tensor], bboxes: List[List[Union[int, float]]], *, scaled_bbox: bool = True
 ):
     """
     Draw bounding boxes on an image.
@@ -21,7 +26,7 @@ def draw_bboxes(
     # Convert tensor image to PIL Image if necessary
     if isinstance(img, torch.Tensor):
         if img.dim() > 3:
-            logger.info("Multi-frame tensor detected, using the first image.")
             img = img[0]
             bboxes = bboxes[0]
         img = to_pil_image(img)
@@ -41,8 +46,9 @@ def draw_bboxes(
         draw.rectangle(shape, outline="red", width=3)
         draw.text((x_min, y_min), str(int(class_id)), font=font, fill="blue")
-    img.save("visualize.jpg")  # Save the image with annotations
-    logger.info("Saved visualize image at visualize.png")
     return img

+import os
 from typing import List, Union
 import numpy as np
 def draw_bboxes(
+    img: Union[Image.Image, torch.Tensor],
+    bboxes: List[List[Union[int, float]]],
+    *,
+    scaled_bbox: bool = True,
+    save_path: str = "",
 ):
     """
     Draw bounding boxes on an image.
     # Convert tensor image to PIL Image if necessary
     if isinstance(img, torch.Tensor):
         if img.dim() > 3:
+            logger.warning("🔍 Multi-frame tensor detected, using the first image.")
             img = img[0]
             bboxes = bboxes[0]
         img = to_pil_image(img)
         draw.rectangle(shape, outline="red", width=3)
         draw.text((x_min, y_min), str(int(class_id)), font=font, fill="blue")
+    save_image_path = os.path.join(save_path, "visualize.png")
+    img.save(save_image_path)  # Save the image with annotations
+    logger.info(f"💾 Saved visualize image at {save_image_path}")
     return img

yolo/utils/dataset_utils.py CHANGED Viewed

@@ -5,6 +5,7 @@ from os import path
 from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
 from yolo.tools.data_conversion import discretize_categories
@@ -32,7 +33,8 @@ def locate_label_paths(dataset_path: str, phase_name: str):
         if txt_files:
             return txt_labels_path, "txt"
-    raise FileNotFoundError("No labels found in the specified dataset path and phase name.")
 def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str, Dict]]:

 from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
+from loguru import logger
 from yolo.tools.data_conversion import discretize_categories
         if txt_files:
             return txt_labels_path, "txt"
+    logger.warning("No labels found in the specified dataset path and phase name.")
+    return [], None
 def create_image_metadata(labels_path: str) -> Tuple[Dict[str, List], Dict[str, Dict]]: