henry000 commited on
Commit
aba5422
·
1 Parent(s): e78c98b

✨ [Add] dynamic image size loader

Browse files
yolo/config/config.py CHANGED
@@ -59,6 +59,7 @@ class DataConfig:
59
  image_size: List[int]
60
  data_augment: Dict[str, int]
61
  source: Optional[Union[str, int]]
 
62
 
63
 
64
  @dataclass
 
59
  image_size: List[int]
60
  data_augment: Dict[str, int]
61
  source: Optional[Union[str, int]]
62
+ dynamic_shape: Optional[bool]
63
 
64
 
65
  @dataclass
yolo/config/task/validation.yaml CHANGED
@@ -1,12 +1,13 @@
1
  task: validation
2
 
3
  data:
4
- batch_size: 16
5
  image_size: ${image_size}
6
  cpu_num: ${cpu_num}
7
  shuffle: False
8
  pin_memory: True
9
  data_augment: {}
 
10
  nms:
11
  min_confidence: 0.0001
12
  min_iou: 0.7
 
1
  task: validation
2
 
3
  data:
4
+ batch_size: 32
5
  image_size: ${image_size}
6
  cpu_num: ${cpu_num}
7
  shuffle: False
8
  pin_memory: True
9
  data_augment: {}
10
+ dynamic_shape: True
11
  nms:
12
  min_confidence: 0.0001
13
  min_iou: 0.7
yolo/tools/data_augmentation.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import numpy as np
2
  import torch
3
  from PIL import Image
@@ -10,8 +12,7 @@ class AugmentationComposer:
10
  def __init__(self, transforms, image_size: int = [640, 640]):
11
  self.transforms = transforms
12
  # TODO: handle List of image_size [640, 640]
13
- self.image_size = image_size
14
- self.pad_resize = PadAndResize(self.image_size)
15
 
16
  for transform in self.transforms:
17
  if hasattr(transform, "set_parent"):
@@ -57,6 +58,9 @@ class PadAndResize:
57
  self.target_width, self.target_height = image_size
58
  self.background_color = background_color
59
 
 
 
 
60
  def __call__(self, image: Image, boxes):
61
  img_width, img_height = image.size
62
  scale = min(self.target_width / img_width, self.target_height / img_height)
 
1
+ from typing import List
2
+
3
  import numpy as np
4
  import torch
5
  from PIL import Image
 
12
  def __init__(self, transforms, image_size: int = [640, 640]):
13
  self.transforms = transforms
14
  # TODO: handle List of image_size [640, 640]
15
+ self.pad_resize = PadAndResize(image_size)
 
16
 
17
  for transform in self.transforms:
18
  if hasattr(transform, "set_parent"):
 
58
  self.target_width, self.target_height = image_size
59
  self.background_color = background_color
60
 
61
+ def set_size(self, image_size: List[int]):
62
+ self.target_width, self.target_height = image_size
63
+
64
  def __call__(self, image: Image, boxes):
65
  img_width, img_height = image.size
66
  scale = min(self.target_width / img_width, self.target_height / img_height)
yolo/tools/data_loader.py CHANGED
@@ -1,5 +1,6 @@
1
  from pathlib import Path
2
  from queue import Empty, Queue
 
3
  from threading import Event, Thread
4
  from typing import Generator, List, Tuple, Union
5
 
@@ -28,12 +29,14 @@ class YoloDataset(Dataset):
28
  augment_cfg = data_cfg.data_augment
29
  self.image_size = data_cfg.image_size
30
  phase_name = dataset_cfg.get(phase, phase)
 
 
 
31
 
32
  transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
33
  self.transform = AugmentationComposer(transforms, self.image_size)
34
  self.transform.get_more_data = self.get_more_data
35
- img_paths, bboxes = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
36
- self.img_paths, self.bboxes = img_paths, bboxes
37
 
38
  def load_data(self, dataset_path: Path, phase_name: str):
39
  """
@@ -102,8 +105,13 @@ class YoloDataset(Dataset):
102
  labels = self.load_valid_labels(image_id, image_seg_annotations)
103
 
104
  img_path = images_path / image_name
105
- data.append((img_path, labels))
 
 
106
  valid_inputs += 1
 
 
 
107
  logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
108
  return data
109
 
@@ -143,8 +151,22 @@ class YoloDataset(Dataset):
143
  indices = torch.randint(0, len(self), (num,))
144
  return [self.get_data(idx)[:2] for idx in indices]
145
 
 
 
 
 
 
 
 
 
 
 
146
  def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
147
  img, bboxes, img_path = self.get_data(idx)
 
 
 
 
148
  img, bboxes, rev_tensor = self.transform(img, bboxes)
149
  bboxes[:, [1, 3]] *= self.image_size[0]
150
  bboxes[:, [2, 4]] *= self.image_size[1]
 
1
  from pathlib import Path
2
  from queue import Empty, Queue
3
+ from statistics import mean
4
  from threading import Event, Thread
5
  from typing import Generator, List, Tuple, Union
6
 
 
29
  augment_cfg = data_cfg.data_augment
30
  self.image_size = data_cfg.image_size
31
  phase_name = dataset_cfg.get(phase, phase)
32
+ self.batch_size = data_cfg.batch_size
33
+ self.dynamic_shape = getattr(data_cfg, "dynamic_shape", True)
34
+ self.base_size = mean(self.image_size)
35
 
36
  transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
37
  self.transform = AugmentationComposer(transforms, self.image_size)
38
  self.transform.get_more_data = self.get_more_data
39
+ self.img_paths, self.bboxes, self.ratios = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
 
40
 
41
  def load_data(self, dataset_path: Path, phase_name: str):
42
  """
 
105
  labels = self.load_valid_labels(image_id, image_seg_annotations)
106
 
107
  img_path = images_path / image_name
108
+ with Image.open(img_path) as img:
109
+ width, height = img.size
110
+ data.append((img_path, labels, width / height))
111
  valid_inputs += 1
112
+
113
+ data = sorted(data, key=lambda x: x[2], reverse=True)
114
+
115
  logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
116
  return data
117
 
 
151
  indices = torch.randint(0, len(self), (num,))
152
  return [self.get_data(idx)[:2] for idx in indices]
153
 
154
+ def _update_image_size(self, idx: int) -> None:
155
+ """Update image size based on dynamic shape and batch settings."""
156
+ batch_start_idx = (idx // self.batch_size) * self.batch_size
157
+ image_ratio = self.ratios[batch_start_idx]
158
+
159
+ shift = ((self.base_size / 32 * (image_ratio - 1)) // (image_ratio + 1)) * 32
160
+
161
+ self.image_size = [int(self.base_size + shift), int(self.base_size - shift)]
162
+ self.transform.pad_resize.set_size(self.image_size)
163
+
164
  def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
165
  img, bboxes, img_path = self.get_data(idx)
166
+
167
+ if self.dynamic_shape:
168
+ self._update_image_size(idx)
169
+
170
  img, bboxes, rev_tensor = self.transform(img, bboxes)
171
  bboxes[:, [1, 3]] *= self.image_size[0]
172
  bboxes[:, [2, 4]] *= self.image_size[1]
yolo/tools/solver.py CHANGED
@@ -45,7 +45,7 @@ class ValidateModel(BaseModel):
45
 
46
  def validation_step(self, batch, batch_idx):
47
  batch_size, images, targets, rev_tensor, img_paths = batch
48
- predicts = self.post_process(self(images))
49
  batch_metrics = self.metric(
50
  [to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
51
  )
@@ -127,7 +127,7 @@ class InferenceModel(BaseModel):
127
 
128
  def predict_step(self, batch, batch_idx):
129
  images, rev_tensor, origin_frame = batch
130
- predicts = self.post_process(self(images), rev_tensor)
131
  img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
132
  if getattr(self.predict_loader, "is_stream", None):
133
  fps = self._display_stream(img)
 
45
 
46
  def validation_step(self, batch, batch_idx):
47
  batch_size, images, targets, rev_tensor, img_paths = batch
48
+ predicts = self.post_process(self(images), image_size=images.shape[2:])
49
  batch_metrics = self.metric(
50
  [to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
51
  )
 
127
 
128
  def predict_step(self, batch, batch_idx):
129
  images, rev_tensor, origin_frame = batch
130
+ predicts = self.post_process(self(images), rev_tensor=rev_tensor)
131
  img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
132
  if getattr(self.predict_loader, "is_stream", None):
133
  fps = self._display_stream(img)
yolo/utils/bounding_box_utils.py CHANGED
@@ -122,7 +122,7 @@ def generate_anchors(image_size: List[int], strides: List[int]):
122
  all_anchors [HW x 2]:
123
  all_scalers [HW]: The index of the best targets for each anchors
124
  """
125
- W, H = image_size
126
  anchors = []
127
  scaler = []
128
  for stride in strides:
@@ -308,6 +308,7 @@ class Vec2Box:
308
  self.strides = self.create_auto_anchor(model, image_size)
309
 
310
  anchor_grid, scaler = generate_anchors(image_size, self.strides)
 
311
  self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
312
 
313
  def create_auto_anchor(self, model: YOLO, image_size):
@@ -320,7 +321,13 @@ class Vec2Box:
320
  return strides
321
 
322
  def update(self, image_size):
 
 
 
 
 
323
  anchor_grid, scaler = generate_anchors(image_size, self.strides)
 
324
  self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
325
 
326
  def __call__(self, predicts):
 
122
  all_anchors [HW x 2]:
123
  all_scalers [HW]: The index of the best targets for each anchors
124
  """
125
+ H, W = image_size
126
  anchors = []
127
  scaler = []
128
  for stride in strides:
 
308
  self.strides = self.create_auto_anchor(model, image_size)
309
 
310
  anchor_grid, scaler = generate_anchors(image_size, self.strides)
311
+ self.image_size = image_size
312
  self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
313
 
314
  def create_auto_anchor(self, model: YOLO, image_size):
 
321
  return strides
322
 
323
  def update(self, image_size):
324
+ """
325
+ image_size: H, W
326
+ """
327
+ if self.image_size == image_size:
328
+ return
329
  anchor_grid, scaler = generate_anchors(image_size, self.strides)
330
+ self.image_size = image_size
331
  self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
332
 
333
  def __call__(self, predicts):
yolo/utils/dataset_utils.py CHANGED
@@ -115,7 +115,7 @@ def scale_segmentation(
115
 
116
 
117
  def tensorlize(data):
118
- img_paths, bboxes = zip(*data)
119
  max_box = max(bbox.size(0) for bbox in bboxes)
120
  padded_bbox_list = []
121
  for bbox in bboxes:
@@ -124,4 +124,5 @@ def tensorlize(data):
124
  padded_bbox_list.append(padding)
125
  bboxes = np.stack(padded_bbox_list)
126
  img_paths = np.array(img_paths)
127
- return img_paths, bboxes
 
 
115
 
116
 
117
  def tensorlize(data):
118
+ img_paths, bboxes, img_ratios = zip(*data)
119
  max_box = max(bbox.size(0) for bbox in bboxes)
120
  padded_bbox_list = []
121
  for bbox in bboxes:
 
124
  padded_bbox_list.append(padding)
125
  bboxes = np.stack(padded_bbox_list)
126
  img_paths = np.array(img_paths)
127
+ img_ratios = np.array(img_ratios)
128
+ return img_paths, bboxes, img_ratios
yolo/utils/model_utils.py CHANGED
@@ -11,7 +11,7 @@ from torch.optim.lr_scheduler import LambdaLR, SequentialLR, _LRScheduler
11
 
12
  from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
13
  from yolo.model.yolo import YOLO
14
- from yolo.utils.bounding_box_utils import bbox_nms, transform_bbox
15
  from yolo.utils.logger import logger
16
 
17
 
@@ -130,11 +130,15 @@ class PostProcess:
130
  scale back the prediction and do nms for pred_bbox
131
  """
132
 
133
- def __init__(self, converter, nms_cfg: NMSConfig) -> None:
134
  self.converter = converter
135
  self.nms = nms_cfg
136
 
137
- def __call__(self, predict, rev_tensor: Optional[Tensor] = None) -> List[Tensor]:
 
 
 
 
138
  prediction = self.converter(predict["Main"])
139
  pred_class, _, pred_bbox = prediction[:3]
140
  pred_conf = prediction[3] if len(prediction) == 4 else None
 
11
 
12
  from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
13
  from yolo.model.yolo import YOLO
14
+ from yolo.utils.bounding_box_utils import Anc2Box, Vec2Box, bbox_nms, transform_bbox
15
  from yolo.utils.logger import logger
16
 
17
 
 
130
  scale back the prediction and do nms for pred_bbox
131
  """
132
 
133
+ def __init__(self, converter: Union[Vec2Box, Anc2Box], nms_cfg: NMSConfig) -> None:
134
  self.converter = converter
135
  self.nms = nms_cfg
136
 
137
+ def __call__(
138
+ self, predict, rev_tensor: Optional[Tensor] = None, image_size: Optional[List[int]] = None
139
+ ) -> List[Tensor]:
140
+ if image_size is not None:
141
+ self.converter.update(image_size)
142
  prediction = self.converter(predict["Main"])
143
  pred_class, _, pred_bbox = prediction[:3]
144
  pred_conf = prediction[3] if len(prediction) == 4 else None