Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Nov 4, 2024

Commit

aba5422

1 Parent(s): e78c98b

✨ [Add] dynamic image size loader

Browse files

Files changed (8) hide show

yolo/config/config.py +1 -0
yolo/config/task/validation.yaml +2 -1
yolo/tools/data_augmentation.py +6 -2
yolo/tools/data_loader.py +25 -3
yolo/tools/solver.py +2 -2
yolo/utils/bounding_box_utils.py +8 -1
yolo/utils/dataset_utils.py +3 -2
yolo/utils/model_utils.py +7 -3

yolo/config/config.py CHANGED Viewed

@@ -59,6 +59,7 @@ class DataConfig:
     image_size: List[int]
     data_augment: Dict[str, int]
     source: Optional[Union[str, int]]
 @dataclass

     image_size: List[int]
     data_augment: Dict[str, int]
     source: Optional[Union[str, int]]
+    dynamic_shape: Optional[bool]
 @dataclass

yolo/config/task/validation.yaml CHANGED Viewed

@@ -1,12 +1,13 @@
 task: validation
 data:
-  batch_size: 16
   image_size: ${image_size}
   cpu_num: ${cpu_num}
   shuffle: False
   pin_memory: True
   data_augment: {}
 nms:
   min_confidence: 0.0001
   min_iou: 0.7

 task: validation
 data:
+  batch_size: 32
   image_size: ${image_size}
   cpu_num: ${cpu_num}
   shuffle: False
   pin_memory: True
   data_augment: {}
+  dynamic_shape: True
 nms:
   min_confidence: 0.0001
   min_iou: 0.7

yolo/tools/data_augmentation.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import numpy as np
 import torch
 from PIL import Image
@@ -10,8 +12,7 @@ class AugmentationComposer:
     def __init__(self, transforms, image_size: int = [640, 640]):
         self.transforms = transforms
         # TODO: handle List of image_size [640, 640]
-        self.image_size = image_size
-        self.pad_resize = PadAndResize(self.image_size)
         for transform in self.transforms:
             if hasattr(transform, "set_parent"):
@@ -57,6 +58,9 @@ class PadAndResize:
         self.target_width, self.target_height = image_size
         self.background_color = background_color
     def __call__(self, image: Image, boxes):
         img_width, img_height = image.size
         scale = min(self.target_width / img_width, self.target_height / img_height)

+from typing import List
 import numpy as np
 import torch
 from PIL import Image
     def __init__(self, transforms, image_size: int = [640, 640]):
         self.transforms = transforms
         # TODO: handle List of image_size [640, 640]
+        self.pad_resize = PadAndResize(image_size)
         for transform in self.transforms:
             if hasattr(transform, "set_parent"):
         self.target_width, self.target_height = image_size
         self.background_color = background_color
+    def set_size(self, image_size: List[int]):
+        self.target_width, self.target_height = image_size
     def __call__(self, image: Image, boxes):
         img_width, img_height = image.size
         scale = min(self.target_width / img_width, self.target_height / img_height)

yolo/tools/data_loader.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from pathlib import Path
 from queue import Empty, Queue
 from threading import Event, Thread
 from typing import Generator, List, Tuple, Union
@@ -28,12 +29,14 @@ class YoloDataset(Dataset):
         augment_cfg = data_cfg.data_augment
         self.image_size = data_cfg.image_size
         phase_name = dataset_cfg.get(phase, phase)
         transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
         self.transform = AugmentationComposer(transforms, self.image_size)
         self.transform.get_more_data = self.get_more_data
-        img_paths, bboxes = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
-        self.img_paths, self.bboxes = img_paths, bboxes
     def load_data(self, dataset_path: Path, phase_name: str):
         """
@@ -102,8 +105,13 @@ class YoloDataset(Dataset):
             labels = self.load_valid_labels(image_id, image_seg_annotations)
             img_path = images_path / image_name
-            data.append((img_path, labels))
             valid_inputs += 1
         logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
         return data
@@ -143,8 +151,22 @@ class YoloDataset(Dataset):
         indices = torch.randint(0, len(self), (num,))
         return [self.get_data(idx)[:2] for idx in indices]
     def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
         img, bboxes, img_path = self.get_data(idx)
         img, bboxes, rev_tensor = self.transform(img, bboxes)
         bboxes[:, [1, 3]] *= self.image_size[0]
         bboxes[:, [2, 4]] *= self.image_size[1]

 from pathlib import Path
 from queue import Empty, Queue
+from statistics import mean
 from threading import Event, Thread
 from typing import Generator, List, Tuple, Union
         augment_cfg = data_cfg.data_augment
         self.image_size = data_cfg.image_size
         phase_name = dataset_cfg.get(phase, phase)
+        self.batch_size = data_cfg.batch_size
+        self.dynamic_shape = getattr(data_cfg, "dynamic_shape", True)
+        self.base_size = mean(self.image_size)
         transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
         self.transform = AugmentationComposer(transforms, self.image_size)
         self.transform.get_more_data = self.get_more_data
+        self.img_paths, self.bboxes, self.ratios = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
     def load_data(self, dataset_path: Path, phase_name: str):
         """
             labels = self.load_valid_labels(image_id, image_seg_annotations)
             img_path = images_path / image_name
+            with Image.open(img_path) as img:
+                width, height = img.size
+            data.append((img_path, labels, width / height))
             valid_inputs += 1
+        data = sorted(data, key=lambda x: x[2], reverse=True)
         logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
         return data
         indices = torch.randint(0, len(self), (num,))
         return [self.get_data(idx)[:2] for idx in indices]
+    def _update_image_size(self, idx: int) -> None:
+        """Update image size based on dynamic shape and batch settings."""
+        batch_start_idx = (idx // self.batch_size) * self.batch_size
+        image_ratio = self.ratios[batch_start_idx]
+        shift = ((self.base_size / 32 * (image_ratio - 1)) // (image_ratio + 1)) * 32
+        self.image_size = [int(self.base_size + shift), int(self.base_size - shift)]
+        self.transform.pad_resize.set_size(self.image_size)
     def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
         img, bboxes, img_path = self.get_data(idx)
+        if self.dynamic_shape:
+            self._update_image_size(idx)
         img, bboxes, rev_tensor = self.transform(img, bboxes)
         bboxes[:, [1, 3]] *= self.image_size[0]
         bboxes[:, [2, 4]] *= self.image_size[1]

yolo/tools/solver.py CHANGED Viewed

@@ -45,7 +45,7 @@ class ValidateModel(BaseModel):
     def validation_step(self, batch, batch_idx):
         batch_size, images, targets, rev_tensor, img_paths = batch
-        predicts = self.post_process(self(images))
         batch_metrics = self.metric(
             [to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
         )
@@ -127,7 +127,7 @@ class InferenceModel(BaseModel):
     def predict_step(self, batch, batch_idx):
         images, rev_tensor, origin_frame = batch
-        predicts = self.post_process(self(images), rev_tensor)
         img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
         if getattr(self.predict_loader, "is_stream", None):
             fps = self._display_stream(img)

     def validation_step(self, batch, batch_idx):
         batch_size, images, targets, rev_tensor, img_paths = batch
+        predicts = self.post_process(self(images), image_size=images.shape[2:])
         batch_metrics = self.metric(
             [to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
         )
     def predict_step(self, batch, batch_idx):
         images, rev_tensor, origin_frame = batch
+        predicts = self.post_process(self(images), rev_tensor=rev_tensor)
         img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
         if getattr(self.predict_loader, "is_stream", None):
             fps = self._display_stream(img)

yolo/utils/bounding_box_utils.py CHANGED Viewed

@@ -122,7 +122,7 @@ def generate_anchors(image_size: List[int], strides: List[int]):
         all_anchors [HW x 2]:
         all_scalers [HW]: The index of the best targets for each anchors
     """
-    W, H = image_size
     anchors = []
     scaler = []
     for stride in strides:
@@ -308,6 +308,7 @@ class Vec2Box:
             self.strides = self.create_auto_anchor(model, image_size)
         anchor_grid, scaler = generate_anchors(image_size, self.strides)
         self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
     def create_auto_anchor(self, model: YOLO, image_size):
@@ -320,7 +321,13 @@ class Vec2Box:
         return strides
     def update(self, image_size):
         anchor_grid, scaler = generate_anchors(image_size, self.strides)
         self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
     def __call__(self, predicts):

         all_anchors [HW x 2]:
         all_scalers [HW]: The index of the best targets for each anchors
     """
+    H, W = image_size
     anchors = []
     scaler = []
     for stride in strides:
             self.strides = self.create_auto_anchor(model, image_size)
         anchor_grid, scaler = generate_anchors(image_size, self.strides)
+        self.image_size = image_size
         self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
     def create_auto_anchor(self, model: YOLO, image_size):
         return strides
     def update(self, image_size):
+        """
+        image_size: H, W
+        """
+        if self.image_size == image_size:
+            return
         anchor_grid, scaler = generate_anchors(image_size, self.strides)
+        self.image_size = image_size
         self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
     def __call__(self, predicts):

yolo/utils/dataset_utils.py CHANGED Viewed

@@ -115,7 +115,7 @@ def scale_segmentation(
 def tensorlize(data):
-    img_paths, bboxes = zip(*data)
     max_box = max(bbox.size(0) for bbox in bboxes)
     padded_bbox_list = []
     for bbox in bboxes:
@@ -124,4 +124,5 @@ def tensorlize(data):
         padded_bbox_list.append(padding)
     bboxes = np.stack(padded_bbox_list)
     img_paths = np.array(img_paths)
-    return img_paths, bboxes

 def tensorlize(data):
+    img_paths, bboxes, img_ratios = zip(*data)
     max_box = max(bbox.size(0) for bbox in bboxes)
     padded_bbox_list = []
     for bbox in bboxes:
         padded_bbox_list.append(padding)
     bboxes = np.stack(padded_bbox_list)
     img_paths = np.array(img_paths)
+    img_ratios = np.array(img_ratios)
+    return img_paths, bboxes, img_ratios

yolo/utils/model_utils.py CHANGED Viewed

@@ -11,7 +11,7 @@ from torch.optim.lr_scheduler import LambdaLR, SequentialLR, _LRScheduler
 from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
 from yolo.model.yolo import YOLO
-from yolo.utils.bounding_box_utils import bbox_nms, transform_bbox
 from yolo.utils.logger import logger
@@ -130,11 +130,15 @@ class PostProcess:
     scale back the prediction and do nms for pred_bbox
     """
-    def __init__(self, converter, nms_cfg: NMSConfig) -> None:
         self.converter = converter
         self.nms = nms_cfg
-    def __call__(self, predict, rev_tensor: Optional[Tensor] = None) -> List[Tensor]:
         prediction = self.converter(predict["Main"])
         pred_class, _, pred_bbox = prediction[:3]
         pred_conf = prediction[3] if len(prediction) == 4 else None

 from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
 from yolo.model.yolo import YOLO
+from yolo.utils.bounding_box_utils import Anc2Box, Vec2Box, bbox_nms, transform_bbox
 from yolo.utils.logger import logger
     scale back the prediction and do nms for pred_bbox
     """
+    def __init__(self, converter: Union[Vec2Box, Anc2Box], nms_cfg: NMSConfig) -> None:
         self.converter = converter
         self.nms = nms_cfg
+    def __call__(
+        self, predict, rev_tensor: Optional[Tensor] = None, image_size: Optional[List[int]] = None
+    ) -> List[Tensor]:
+        if image_size is not None:
+            self.converter.update(image_size)
         prediction = self.converter(predict["Main"])
         pred_class, _, pred_bbox = prediction[:3]
         pred_conf = prediction[3] if len(prediction) == 4 else None