♻️ [Refactor] Some Module code and class order
Browse files- yolo/model/module.py +94 -105
yolo/model/module.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
from typing import Any, Dict, List, Optional, Tuple
|
2 |
|
3 |
import torch
|
|
|
4 |
from torch import Tensor, nn
|
5 |
from torch.nn.common_types import _size_2_t
|
6 |
|
7 |
from yolo.tools.module_helper import auto_pad, get_activation, round_up
|
8 |
|
9 |
|
|
|
10 |
class Conv(nn.Module):
|
11 |
"""A basic convolutional block that includes convolution, batch normalization, and activation."""
|
12 |
|
@@ -42,63 +44,7 @@ class Pool(nn.Module):
|
|
42 |
return self.pool(x)
|
43 |
|
44 |
|
45 |
-
|
46 |
-
"""Downsampling module combining average and max pooling with convolution for feature reduction."""
|
47 |
-
|
48 |
-
def __init__(self, in_channels: int, out_channels: int):
|
49 |
-
super().__init__()
|
50 |
-
half_in_channels = in_channels // 2
|
51 |
-
half_out_channels = out_channels // 2
|
52 |
-
mid_layer = {"kernel_size": 3, "stride": 2}
|
53 |
-
self.avg_pool = Pool("avg", kernel_size=2, stride=1)
|
54 |
-
self.conv1 = Conv(half_in_channels, half_out_channels, **mid_layer)
|
55 |
-
self.max_pool = Pool("max", **mid_layer)
|
56 |
-
self.conv2 = Conv(half_in_channels, half_out_channels, kernel_size=1)
|
57 |
-
|
58 |
-
def forward(self, x: Tensor) -> Tensor:
|
59 |
-
x = self.avg_pool(x)
|
60 |
-
x1, x2 = x.chunk(2, dim=1)
|
61 |
-
x1 = self.conv1(x1)
|
62 |
-
x2 = self.max_pool(x2)
|
63 |
-
x2 = self.conv2(x2)
|
64 |
-
return torch.cat((x1, x2), dim=1)
|
65 |
-
|
66 |
-
|
67 |
-
class CBLinear(nn.Module):
|
68 |
-
"""Convolutional block that outputs multiple feature maps split along the channel dimension."""
|
69 |
-
|
70 |
-
def __init__(self, in_channels: int, out_channels: int, kernel_size: int = 1, **kwargs):
|
71 |
-
super(CBLinear, self).__init__()
|
72 |
-
kwargs.setdefault("padding", auto_pad(kernel_size, **kwargs))
|
73 |
-
self.conv = nn.Conv2d(in_channels, sum(out_channels), kernel_size, **kwargs)
|
74 |
-
self.out_channels = out_channels
|
75 |
-
|
76 |
-
def forward(self, x: Tensor) -> Tuple[Tensor]:
|
77 |
-
x = self.conv(x)
|
78 |
-
return x.split(self.out_channels, dim=1)
|
79 |
-
|
80 |
-
|
81 |
-
class SPPELAN(nn.Module):
|
82 |
-
"""SPPELAN module comprising multiple pooling and convolution layers."""
|
83 |
-
|
84 |
-
def __init__(self, in_channels, out_channels, neck_channels=Optional[int]):
|
85 |
-
super(SPPELAN, self).__init__()
|
86 |
-
neck_channels = neck_channels or out_channels // 2
|
87 |
-
|
88 |
-
self.conv1 = Conv(in_channels, neck_channels, kernel_size=1)
|
89 |
-
self.pools = nn.ModuleList([Pool("max", 5, stride=1) for _ in range(3)])
|
90 |
-
self.conv5 = Conv(4 * neck_channels, out_channels, kernel_size=1)
|
91 |
-
|
92 |
-
def forward(self, x: Tensor) -> Tensor:
|
93 |
-
features = [self.conv1(x)]
|
94 |
-
for pool in self.pools:
|
95 |
-
features.append(pool(features[-1]))
|
96 |
-
return self.conv5(torch.cat(features, dim=1))
|
97 |
-
|
98 |
-
|
99 |
-
#### -- ####
|
100 |
-
|
101 |
-
|
102 |
class Detection(nn.Module):
|
103 |
"""A single YOLO Detection head for detection models"""
|
104 |
|
@@ -139,8 +85,7 @@ class MultiheadDetection(nn.Module):
|
|
139 |
return [head(x) for x, head in zip(x_list, self.heads)]
|
140 |
|
141 |
|
142 |
-
|
143 |
-
# RepVGG
|
144 |
class RepConv(nn.Module):
|
145 |
"""A convolutional block that combines two convolution layers (kernel and point-wise)."""
|
146 |
|
@@ -172,7 +117,7 @@ class RepNBottleneck(nn.Module):
|
|
172 |
*,
|
173 |
kernel_size: Tuple[int, int] = (3, 3),
|
174 |
residual: bool = True,
|
175 |
-
expand: float =
|
176 |
**kwargs
|
177 |
):
|
178 |
super().__init__()
|
@@ -183,7 +128,9 @@ class RepNBottleneck(nn.Module):
|
|
183 |
|
184 |
if residual and (in_channels != out_channels):
|
185 |
self.residual = False
|
186 |
-
|
|
|
|
|
187 |
|
188 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
189 |
y = self.conv2(self.conv1(x))
|
@@ -201,28 +148,24 @@ class RepNCSP(nn.Module):
|
|
201 |
*,
|
202 |
csp_expand: float = 0.5,
|
203 |
repeat_num: int = 1,
|
204 |
-
|
205 |
**kwargs
|
206 |
):
|
207 |
super().__init__()
|
208 |
|
209 |
-
if bottleneck_args is None:
|
210 |
-
bottleneck_args = {"kernel_size": (3, 3), "residual": True, "expand": 0.5}
|
211 |
-
|
212 |
neck_channels = int(out_channels * csp_expand)
|
213 |
self.conv1 = Conv(in_channels, neck_channels, kernel_size, **kwargs)
|
214 |
self.conv2 = Conv(in_channels, neck_channels, kernel_size, **kwargs)
|
215 |
self.conv3 = Conv(2 * neck_channels, out_channels, kernel_size, **kwargs)
|
216 |
|
217 |
-
self.
|
218 |
-
*[RepNBottleneck(neck_channels, neck_channels, **
|
219 |
)
|
220 |
|
221 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
return self.conv3(torch.cat((bottleneck_output, split_features), dim=1))
|
226 |
|
227 |
|
228 |
class RepNCSPELAN(nn.Module):
|
@@ -230,48 +173,103 @@ class RepNCSPELAN(nn.Module):
|
|
230 |
|
231 |
def __init__(
|
232 |
self,
|
233 |
-
*,
|
234 |
in_channels: int,
|
235 |
out_channels: int,
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
**kwargs
|
242 |
):
|
243 |
super().__init__()
|
244 |
|
245 |
-
if
|
246 |
-
|
247 |
|
248 |
-
self.conv1 = Conv(in_channels,
|
249 |
self.conv2 = nn.Sequential(
|
250 |
-
RepNCSP(
|
251 |
-
partition_channels // 2,
|
252 |
-
process_channels,
|
253 |
-
csp_expand=expand,
|
254 |
-
bottleneck_args=bottleneck_args,
|
255 |
-
**repncsp_args
|
256 |
-
),
|
257 |
Conv(process_channels, process_channels, 3, padding=1, **kwargs),
|
258 |
)
|
259 |
self.conv3 = nn.Sequential(
|
260 |
-
RepNCSP(
|
261 |
-
process_channels, process_channels, csp_expand=expand, bottleneck_args=bottleneck_args, **repncsp_args
|
262 |
-
),
|
263 |
Conv(process_channels, process_channels, 3, padding=1, **kwargs),
|
264 |
)
|
265 |
-
self.conv4 = Conv(
|
266 |
|
267 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
return
|
|
|
273 |
|
|
|
|
|
274 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
275 |
# ResNet
|
276 |
class Res(nn.Module):
|
277 |
# ResNet bottleneck
|
@@ -567,15 +565,6 @@ class ImplicitM(nn.Module):
|
|
567 |
return self.implicit * x
|
568 |
|
569 |
|
570 |
-
class UpSample(nn.Module):
|
571 |
-
def __init__(self, **kwargs):
|
572 |
-
super().__init__()
|
573 |
-
self.UpSample = nn.Upsample(**kwargs)
|
574 |
-
|
575 |
-
def forward(self, x):
|
576 |
-
return self.UpSample(x)
|
577 |
-
|
578 |
-
|
579 |
class IDetect(nn.Module):
|
580 |
"""
|
581 |
#TODO: Add Detect class, change IDetect base class
|
|
|
1 |
from typing import Any, Dict, List, Optional, Tuple
|
2 |
|
3 |
import torch
|
4 |
+
from loguru import logger
|
5 |
from torch import Tensor, nn
|
6 |
from torch.nn.common_types import _size_2_t
|
7 |
|
8 |
from yolo.tools.module_helper import auto_pad, get_activation, round_up
|
9 |
|
10 |
|
11 |
+
# ----------- Basic Class ----------- #
|
12 |
class Conv(nn.Module):
|
13 |
"""A basic convolutional block that includes convolution, batch normalization, and activation."""
|
14 |
|
|
|
44 |
return self.pool(x)
|
45 |
|
46 |
|
47 |
+
# ----------- Detection Class ----------- #
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
class Detection(nn.Module):
|
49 |
"""A single YOLO Detection head for detection models"""
|
50 |
|
|
|
85 |
return [head(x) for x, head in zip(x_list, self.heads)]
|
86 |
|
87 |
|
88 |
+
# ----------- Backbone Class ----------- #
|
|
|
89 |
class RepConv(nn.Module):
|
90 |
"""A convolutional block that combines two convolution layers (kernel and point-wise)."""
|
91 |
|
|
|
117 |
*,
|
118 |
kernel_size: Tuple[int, int] = (3, 3),
|
119 |
residual: bool = True,
|
120 |
+
expand: float = 0.5,
|
121 |
**kwargs
|
122 |
):
|
123 |
super().__init__()
|
|
|
128 |
|
129 |
if residual and (in_channels != out_channels):
|
130 |
self.residual = False
|
131 |
+
logger.warning(
|
132 |
+
"Residual connection disabled: in_channels ({}) != out_channels ({})", in_channels, out_channels
|
133 |
+
)
|
134 |
|
135 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
136 |
y = self.conv2(self.conv1(x))
|
|
|
148 |
*,
|
149 |
csp_expand: float = 0.5,
|
150 |
repeat_num: int = 1,
|
151 |
+
neck_args: Dict[str, Any] = {},
|
152 |
**kwargs
|
153 |
):
|
154 |
super().__init__()
|
155 |
|
|
|
|
|
|
|
156 |
neck_channels = int(out_channels * csp_expand)
|
157 |
self.conv1 = Conv(in_channels, neck_channels, kernel_size, **kwargs)
|
158 |
self.conv2 = Conv(in_channels, neck_channels, kernel_size, **kwargs)
|
159 |
self.conv3 = Conv(2 * neck_channels, out_channels, kernel_size, **kwargs)
|
160 |
|
161 |
+
self.bottleneck = nn.Sequential(
|
162 |
+
*[RepNBottleneck(neck_channels, neck_channels, **neck_args) for _ in range(repeat_num)]
|
163 |
)
|
164 |
|
165 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
166 |
+
x1 = self.bottleneck(self.conv1(x))
|
167 |
+
x2 = self.conv2(x)
|
168 |
+
return self.conv3(torch.cat((x1, x2), dim=1))
|
|
|
169 |
|
170 |
|
171 |
class RepNCSPELAN(nn.Module):
|
|
|
173 |
|
174 |
def __init__(
|
175 |
self,
|
|
|
176 |
in_channels: int,
|
177 |
out_channels: int,
|
178 |
+
part_channels: int,
|
179 |
+
*,
|
180 |
+
process_channels: Optional[int] = None,
|
181 |
+
csp_args: Dict[str, Any] = {},
|
182 |
+
csp_neck_args: Dict[str, Any] = {},
|
183 |
**kwargs
|
184 |
):
|
185 |
super().__init__()
|
186 |
|
187 |
+
if process_channels is None:
|
188 |
+
process_channels = part_channels // 2
|
189 |
|
190 |
+
self.conv1 = Conv(in_channels, part_channels, 1, **kwargs)
|
191 |
self.conv2 = nn.Sequential(
|
192 |
+
RepNCSP(part_channels // 2, process_channels, csp_neck_args=csp_neck_args, **csp_args),
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
Conv(process_channels, process_channels, 3, padding=1, **kwargs),
|
194 |
)
|
195 |
self.conv3 = nn.Sequential(
|
196 |
+
RepNCSP(process_channels, process_channels, csp_neck_args=csp_neck_args, **csp_args),
|
|
|
|
|
197 |
Conv(process_channels, process_channels, 3, padding=1, **kwargs),
|
198 |
)
|
199 |
+
self.conv4 = Conv(part_channels + 2 * process_channels, out_channels, 1, **kwargs)
|
200 |
|
201 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
202 |
+
x1, x2 = self.conv1(x).chunk(2, 1)
|
203 |
+
x3 = self.conv2(x2)
|
204 |
+
x4 = self.conv3(x3)
|
205 |
+
x5 = self.conv4(torch.cat([x1, x2, x3, x4], dim=1))
|
206 |
+
return x5
|
207 |
+
|
208 |
|
209 |
+
class ADown(nn.Module):
|
210 |
+
"""Downsampling module combining average and max pooling with convolution for feature reduction."""
|
211 |
|
212 |
+
def __init__(self, in_channels: int, out_channels: int):
|
213 |
+
super().__init__()
|
214 |
+
half_in_channels = in_channels // 2
|
215 |
+
half_out_channels = out_channels // 2
|
216 |
+
mid_layer = {"kernel_size": 3, "stride": 2}
|
217 |
+
self.avg_pool = Pool("avg", kernel_size=2, stride=1)
|
218 |
+
self.conv1 = Conv(half_in_channels, half_out_channels, **mid_layer)
|
219 |
+
self.max_pool = Pool("max", **mid_layer)
|
220 |
+
self.conv2 = Conv(half_in_channels, half_out_channels, kernel_size=1)
|
221 |
+
|
222 |
+
def forward(self, x: Tensor) -> Tensor:
|
223 |
+
x = self.avg_pool(x)
|
224 |
+
x1, x2 = x.chunk(2, dim=1)
|
225 |
+
x1 = self.conv1(x1)
|
226 |
+
x2 = self.max_pool(x2)
|
227 |
+
x2 = self.conv2(x2)
|
228 |
+
return torch.cat((x1, x2), dim=1)
|
229 |
+
|
230 |
+
|
231 |
+
class CBLinear(nn.Module):
|
232 |
+
"""Convolutional block that outputs multiple feature maps split along the channel dimension."""
|
233 |
+
|
234 |
+
def __init__(self, in_channels: int, out_channels: List[int], kernel_size: int = 1, **kwargs):
|
235 |
+
super(CBLinear, self).__init__()
|
236 |
+
kwargs.setdefault("padding", auto_pad(kernel_size, **kwargs))
|
237 |
+
self.conv = nn.Conv2d(in_channels, sum(out_channels), kernel_size, **kwargs)
|
238 |
+
self.out_channels = out_channels
|
239 |
+
|
240 |
+
def forward(self, x: Tensor) -> Tuple[Tensor]:
|
241 |
+
x = self.conv(x)
|
242 |
+
return x.split(self.out_channels, dim=1)
|
243 |
+
|
244 |
+
|
245 |
+
class SPPELAN(nn.Module):
|
246 |
+
"""SPPELAN module comprising multiple pooling and convolution layers."""
|
247 |
+
|
248 |
+
def __init__(self, in_channels, out_channels, neck_channels=Optional[int]):
|
249 |
+
super(SPPELAN, self).__init__()
|
250 |
+
neck_channels = neck_channels or out_channels // 2
|
251 |
+
|
252 |
+
self.conv1 = Conv(in_channels, neck_channels, kernel_size=1)
|
253 |
+
self.pools = nn.ModuleList([Pool("max", 5, stride=1) for _ in range(3)])
|
254 |
+
self.conv5 = Conv(4 * neck_channels, out_channels, kernel_size=1)
|
255 |
+
|
256 |
+
def forward(self, x: Tensor) -> Tensor:
|
257 |
+
features = [self.conv1(x)]
|
258 |
+
for pool in self.pools:
|
259 |
+
features.append(pool(features[-1]))
|
260 |
+
return self.conv5(torch.cat(features, dim=1))
|
261 |
+
|
262 |
+
|
263 |
+
class UpSample(nn.Module):
|
264 |
+
def __init__(self, **kwargs):
|
265 |
+
super().__init__()
|
266 |
+
self.UpSample = nn.Upsample(**kwargs)
|
267 |
+
|
268 |
+
def forward(self, x):
|
269 |
+
return self.UpSample(x)
|
270 |
+
|
271 |
+
|
272 |
+
############# Waiting For Refactor #############
|
273 |
# ResNet
|
274 |
class Res(nn.Module):
|
275 |
# ResNet bottleneck
|
|
|
565 |
return self.implicit * x
|
566 |
|
567 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
568 |
class IDetect(nn.Module):
|
569 |
"""
|
570 |
#TODO: Add Detect class, change IDetect base class
|