✨ [Add] dynamic image size loader
Browse files- yolo/config/config.py +1 -0
- yolo/config/task/validation.yaml +2 -1
- yolo/tools/data_augmentation.py +6 -2
- yolo/tools/data_loader.py +25 -3
- yolo/tools/solver.py +2 -2
- yolo/utils/bounding_box_utils.py +8 -1
- yolo/utils/dataset_utils.py +3 -2
- yolo/utils/model_utils.py +7 -3
yolo/config/config.py
CHANGED
@@ -59,6 +59,7 @@ class DataConfig:
|
|
59 |
image_size: List[int]
|
60 |
data_augment: Dict[str, int]
|
61 |
source: Optional[Union[str, int]]
|
|
|
62 |
|
63 |
|
64 |
@dataclass
|
|
|
59 |
image_size: List[int]
|
60 |
data_augment: Dict[str, int]
|
61 |
source: Optional[Union[str, int]]
|
62 |
+
dynamic_shape: Optional[bool]
|
63 |
|
64 |
|
65 |
@dataclass
|
yolo/config/task/validation.yaml
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
task: validation
|
2 |
|
3 |
data:
|
4 |
-
batch_size:
|
5 |
image_size: ${image_size}
|
6 |
cpu_num: ${cpu_num}
|
7 |
shuffle: False
|
8 |
pin_memory: True
|
9 |
data_augment: {}
|
|
|
10 |
nms:
|
11 |
min_confidence: 0.0001
|
12 |
min_iou: 0.7
|
|
|
1 |
task: validation
|
2 |
|
3 |
data:
|
4 |
+
batch_size: 32
|
5 |
image_size: ${image_size}
|
6 |
cpu_num: ${cpu_num}
|
7 |
shuffle: False
|
8 |
pin_memory: True
|
9 |
data_augment: {}
|
10 |
+
dynamic_shape: True
|
11 |
nms:
|
12 |
min_confidence: 0.0001
|
13 |
min_iou: 0.7
|
yolo/tools/data_augmentation.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import numpy as np
|
2 |
import torch
|
3 |
from PIL import Image
|
@@ -10,8 +12,7 @@ class AugmentationComposer:
|
|
10 |
def __init__(self, transforms, image_size: int = [640, 640]):
|
11 |
self.transforms = transforms
|
12 |
# TODO: handle List of image_size [640, 640]
|
13 |
-
self.
|
14 |
-
self.pad_resize = PadAndResize(self.image_size)
|
15 |
|
16 |
for transform in self.transforms:
|
17 |
if hasattr(transform, "set_parent"):
|
@@ -57,6 +58,9 @@ class PadAndResize:
|
|
57 |
self.target_width, self.target_height = image_size
|
58 |
self.background_color = background_color
|
59 |
|
|
|
|
|
|
|
60 |
def __call__(self, image: Image, boxes):
|
61 |
img_width, img_height = image.size
|
62 |
scale = min(self.target_width / img_width, self.target_height / img_height)
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
import numpy as np
|
4 |
import torch
|
5 |
from PIL import Image
|
|
|
12 |
def __init__(self, transforms, image_size: int = [640, 640]):
|
13 |
self.transforms = transforms
|
14 |
# TODO: handle List of image_size [640, 640]
|
15 |
+
self.pad_resize = PadAndResize(image_size)
|
|
|
16 |
|
17 |
for transform in self.transforms:
|
18 |
if hasattr(transform, "set_parent"):
|
|
|
58 |
self.target_width, self.target_height = image_size
|
59 |
self.background_color = background_color
|
60 |
|
61 |
+
def set_size(self, image_size: List[int]):
|
62 |
+
self.target_width, self.target_height = image_size
|
63 |
+
|
64 |
def __call__(self, image: Image, boxes):
|
65 |
img_width, img_height = image.size
|
66 |
scale = min(self.target_width / img_width, self.target_height / img_height)
|
yolo/tools/data_loader.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
from pathlib import Path
|
2 |
from queue import Empty, Queue
|
|
|
3 |
from threading import Event, Thread
|
4 |
from typing import Generator, List, Tuple, Union
|
5 |
|
@@ -28,12 +29,14 @@ class YoloDataset(Dataset):
|
|
28 |
augment_cfg = data_cfg.data_augment
|
29 |
self.image_size = data_cfg.image_size
|
30 |
phase_name = dataset_cfg.get(phase, phase)
|
|
|
|
|
|
|
31 |
|
32 |
transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
|
33 |
self.transform = AugmentationComposer(transforms, self.image_size)
|
34 |
self.transform.get_more_data = self.get_more_data
|
35 |
-
img_paths, bboxes = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
|
36 |
-
self.img_paths, self.bboxes = img_paths, bboxes
|
37 |
|
38 |
def load_data(self, dataset_path: Path, phase_name: str):
|
39 |
"""
|
@@ -102,8 +105,13 @@ class YoloDataset(Dataset):
|
|
102 |
labels = self.load_valid_labels(image_id, image_seg_annotations)
|
103 |
|
104 |
img_path = images_path / image_name
|
105 |
-
|
|
|
|
|
106 |
valid_inputs += 1
|
|
|
|
|
|
|
107 |
logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
|
108 |
return data
|
109 |
|
@@ -143,8 +151,22 @@ class YoloDataset(Dataset):
|
|
143 |
indices = torch.randint(0, len(self), (num,))
|
144 |
return [self.get_data(idx)[:2] for idx in indices]
|
145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
|
147 |
img, bboxes, img_path = self.get_data(idx)
|
|
|
|
|
|
|
|
|
148 |
img, bboxes, rev_tensor = self.transform(img, bboxes)
|
149 |
bboxes[:, [1, 3]] *= self.image_size[0]
|
150 |
bboxes[:, [2, 4]] *= self.image_size[1]
|
|
|
1 |
from pathlib import Path
|
2 |
from queue import Empty, Queue
|
3 |
+
from statistics import mean
|
4 |
from threading import Event, Thread
|
5 |
from typing import Generator, List, Tuple, Union
|
6 |
|
|
|
29 |
augment_cfg = data_cfg.data_augment
|
30 |
self.image_size = data_cfg.image_size
|
31 |
phase_name = dataset_cfg.get(phase, phase)
|
32 |
+
self.batch_size = data_cfg.batch_size
|
33 |
+
self.dynamic_shape = getattr(data_cfg, "dynamic_shape", True)
|
34 |
+
self.base_size = mean(self.image_size)
|
35 |
|
36 |
transforms = [eval(aug)(prob) for aug, prob in augment_cfg.items()]
|
37 |
self.transform = AugmentationComposer(transforms, self.image_size)
|
38 |
self.transform.get_more_data = self.get_more_data
|
39 |
+
self.img_paths, self.bboxes, self.ratios = tensorlize(self.load_data(Path(dataset_cfg.path), phase_name))
|
|
|
40 |
|
41 |
def load_data(self, dataset_path: Path, phase_name: str):
|
42 |
"""
|
|
|
105 |
labels = self.load_valid_labels(image_id, image_seg_annotations)
|
106 |
|
107 |
img_path = images_path / image_name
|
108 |
+
with Image.open(img_path) as img:
|
109 |
+
width, height = img.size
|
110 |
+
data.append((img_path, labels, width / height))
|
111 |
valid_inputs += 1
|
112 |
+
|
113 |
+
data = sorted(data, key=lambda x: x[2], reverse=True)
|
114 |
+
|
115 |
logger.info(f"Recorded {valid_inputs}/{len(images_list)} valid inputs")
|
116 |
return data
|
117 |
|
|
|
151 |
indices = torch.randint(0, len(self), (num,))
|
152 |
return [self.get_data(idx)[:2] for idx in indices]
|
153 |
|
154 |
+
def _update_image_size(self, idx: int) -> None:
|
155 |
+
"""Update image size based on dynamic shape and batch settings."""
|
156 |
+
batch_start_idx = (idx // self.batch_size) * self.batch_size
|
157 |
+
image_ratio = self.ratios[batch_start_idx]
|
158 |
+
|
159 |
+
shift = ((self.base_size / 32 * (image_ratio - 1)) // (image_ratio + 1)) * 32
|
160 |
+
|
161 |
+
self.image_size = [int(self.base_size + shift), int(self.base_size - shift)]
|
162 |
+
self.transform.pad_resize.set_size(self.image_size)
|
163 |
+
|
164 |
def __getitem__(self, idx) -> Tuple[Image.Image, Tensor, Tensor, List[str]]:
|
165 |
img, bboxes, img_path = self.get_data(idx)
|
166 |
+
|
167 |
+
if self.dynamic_shape:
|
168 |
+
self._update_image_size(idx)
|
169 |
+
|
170 |
img, bboxes, rev_tensor = self.transform(img, bboxes)
|
171 |
bboxes[:, [1, 3]] *= self.image_size[0]
|
172 |
bboxes[:, [2, 4]] *= self.image_size[1]
|
yolo/tools/solver.py
CHANGED
@@ -45,7 +45,7 @@ class ValidateModel(BaseModel):
|
|
45 |
|
46 |
def validation_step(self, batch, batch_idx):
|
47 |
batch_size, images, targets, rev_tensor, img_paths = batch
|
48 |
-
predicts = self.post_process(self(images))
|
49 |
batch_metrics = self.metric(
|
50 |
[to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
|
51 |
)
|
@@ -127,7 +127,7 @@ class InferenceModel(BaseModel):
|
|
127 |
|
128 |
def predict_step(self, batch, batch_idx):
|
129 |
images, rev_tensor, origin_frame = batch
|
130 |
-
predicts = self.post_process(self(images), rev_tensor)
|
131 |
img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
|
132 |
if getattr(self.predict_loader, "is_stream", None):
|
133 |
fps = self._display_stream(img)
|
|
|
45 |
|
46 |
def validation_step(self, batch, batch_idx):
|
47 |
batch_size, images, targets, rev_tensor, img_paths = batch
|
48 |
+
predicts = self.post_process(self(images), image_size=images.shape[2:])
|
49 |
batch_metrics = self.metric(
|
50 |
[to_metrics_format(predict) for predict in predicts], [to_metrics_format(target) for target in targets]
|
51 |
)
|
|
|
127 |
|
128 |
def predict_step(self, batch, batch_idx):
|
129 |
images, rev_tensor, origin_frame = batch
|
130 |
+
predicts = self.post_process(self(images), rev_tensor=rev_tensor)
|
131 |
img = draw_bboxes(origin_frame, predicts, idx2label=self.cfg.dataset.class_list)
|
132 |
if getattr(self.predict_loader, "is_stream", None):
|
133 |
fps = self._display_stream(img)
|
yolo/utils/bounding_box_utils.py
CHANGED
@@ -122,7 +122,7 @@ def generate_anchors(image_size: List[int], strides: List[int]):
|
|
122 |
all_anchors [HW x 2]:
|
123 |
all_scalers [HW]: The index of the best targets for each anchors
|
124 |
"""
|
125 |
-
|
126 |
anchors = []
|
127 |
scaler = []
|
128 |
for stride in strides:
|
@@ -308,6 +308,7 @@ class Vec2Box:
|
|
308 |
self.strides = self.create_auto_anchor(model, image_size)
|
309 |
|
310 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
|
|
311 |
self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
|
312 |
|
313 |
def create_auto_anchor(self, model: YOLO, image_size):
|
@@ -320,7 +321,13 @@ class Vec2Box:
|
|
320 |
return strides
|
321 |
|
322 |
def update(self, image_size):
|
|
|
|
|
|
|
|
|
|
|
323 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
|
|
324 |
self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
|
325 |
|
326 |
def __call__(self, predicts):
|
|
|
122 |
all_anchors [HW x 2]:
|
123 |
all_scalers [HW]: The index of the best targets for each anchors
|
124 |
"""
|
125 |
+
H, W = image_size
|
126 |
anchors = []
|
127 |
scaler = []
|
128 |
for stride in strides:
|
|
|
308 |
self.strides = self.create_auto_anchor(model, image_size)
|
309 |
|
310 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
311 |
+
self.image_size = image_size
|
312 |
self.anchor_grid, self.scaler = anchor_grid.to(device), scaler.to(device)
|
313 |
|
314 |
def create_auto_anchor(self, model: YOLO, image_size):
|
|
|
321 |
return strides
|
322 |
|
323 |
def update(self, image_size):
|
324 |
+
"""
|
325 |
+
image_size: H, W
|
326 |
+
"""
|
327 |
+
if self.image_size == image_size:
|
328 |
+
return
|
329 |
anchor_grid, scaler = generate_anchors(image_size, self.strides)
|
330 |
+
self.image_size = image_size
|
331 |
self.anchor_grid, self.scaler = anchor_grid.to(self.device), scaler.to(self.device)
|
332 |
|
333 |
def __call__(self, predicts):
|
yolo/utils/dataset_utils.py
CHANGED
@@ -115,7 +115,7 @@ def scale_segmentation(
|
|
115 |
|
116 |
|
117 |
def tensorlize(data):
|
118 |
-
img_paths, bboxes = zip(*data)
|
119 |
max_box = max(bbox.size(0) for bbox in bboxes)
|
120 |
padded_bbox_list = []
|
121 |
for bbox in bboxes:
|
@@ -124,4 +124,5 @@ def tensorlize(data):
|
|
124 |
padded_bbox_list.append(padding)
|
125 |
bboxes = np.stack(padded_bbox_list)
|
126 |
img_paths = np.array(img_paths)
|
127 |
-
|
|
|
|
115 |
|
116 |
|
117 |
def tensorlize(data):
|
118 |
+
img_paths, bboxes, img_ratios = zip(*data)
|
119 |
max_box = max(bbox.size(0) for bbox in bboxes)
|
120 |
padded_bbox_list = []
|
121 |
for bbox in bboxes:
|
|
|
124 |
padded_bbox_list.append(padding)
|
125 |
bboxes = np.stack(padded_bbox_list)
|
126 |
img_paths = np.array(img_paths)
|
127 |
+
img_ratios = np.array(img_ratios)
|
128 |
+
return img_paths, bboxes, img_ratios
|
yolo/utils/model_utils.py
CHANGED
@@ -11,7 +11,7 @@ from torch.optim.lr_scheduler import LambdaLR, SequentialLR, _LRScheduler
|
|
11 |
|
12 |
from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
|
13 |
from yolo.model.yolo import YOLO
|
14 |
-
from yolo.utils.bounding_box_utils import bbox_nms, transform_bbox
|
15 |
from yolo.utils.logger import logger
|
16 |
|
17 |
|
@@ -130,11 +130,15 @@ class PostProcess:
|
|
130 |
scale back the prediction and do nms for pred_bbox
|
131 |
"""
|
132 |
|
133 |
-
def __init__(self, converter, nms_cfg: NMSConfig) -> None:
|
134 |
self.converter = converter
|
135 |
self.nms = nms_cfg
|
136 |
|
137 |
-
def __call__(
|
|
|
|
|
|
|
|
|
138 |
prediction = self.converter(predict["Main"])
|
139 |
pred_class, _, pred_bbox = prediction[:3]
|
140 |
pred_conf = prediction[3] if len(prediction) == 4 else None
|
|
|
11 |
|
12 |
from yolo.config.config import IDX_TO_ID, NMSConfig, OptimizerConfig, SchedulerConfig
|
13 |
from yolo.model.yolo import YOLO
|
14 |
+
from yolo.utils.bounding_box_utils import Anc2Box, Vec2Box, bbox_nms, transform_bbox
|
15 |
from yolo.utils.logger import logger
|
16 |
|
17 |
|
|
|
130 |
scale back the prediction and do nms for pred_bbox
|
131 |
"""
|
132 |
|
133 |
+
def __init__(self, converter: Union[Vec2Box, Anc2Box], nms_cfg: NMSConfig) -> None:
|
134 |
self.converter = converter
|
135 |
self.nms = nms_cfg
|
136 |
|
137 |
+
def __call__(
|
138 |
+
self, predict, rev_tensor: Optional[Tensor] = None, image_size: Optional[List[int]] = None
|
139 |
+
) -> List[Tensor]:
|
140 |
+
if image_size is not None:
|
141 |
+
self.converter.update(image_size)
|
142 |
prediction = self.converter(predict["Main"])
|
143 |
pred_class, _, pred_bbox = prediction[:3]
|
144 |
pred_conf = prediction[3] if len(prediction) == 4 else None
|