New TensorFlow `TFCrossConv()` module (#7827)
Browse files* New TensorFlow `TFCrossConv()` module
* Move from experimental to common
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
* Add C3x
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
* Add to C3x to yolo.py
* Add to C3x to tf.py
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
* TFC3x bug fix
* TFC3x bug fix
* TFC3x bug fix
* Add TFDWConv g==c1==c2 check
* Add comment
* Update tf.py
* [pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- models/common.py +23 -2
- models/experimental.py +0 -14
- models/tf.py +39 -10
- models/yolo.py +2 -2
models/common.py
CHANGED
@@ -31,7 +31,7 @@ from utils.torch_utils import copy_attr, time_sync
|
|
31 |
def autopad(k, p=None): # kernel, padding
|
32 |
# Pad to 'same'
|
33 |
if p is None:
|
34 |
-
p = k // 2 if isinstance(k, int) else
|
35 |
return p
|
36 |
|
37 |
|
@@ -124,6 +124,20 @@ class BottleneckCSP(nn.Module):
|
|
124 |
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
|
125 |
|
126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
class C3(nn.Module):
|
128 |
# CSP Bottleneck with 3 convolutions
|
129 |
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
@@ -133,12 +147,19 @@ class C3(nn.Module):
|
|
133 |
self.cv2 = Conv(c1, c_, 1, 1)
|
134 |
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
|
135 |
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
136 |
-
# self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
|
137 |
|
138 |
def forward(self, x):
|
139 |
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
140 |
|
141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
class C3TR(C3):
|
143 |
# C3 module with TransformerBlock()
|
144 |
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
|
31 |
def autopad(k, p=None): # kernel, padding
|
32 |
# Pad to 'same'
|
33 |
if p is None:
|
34 |
+
p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
|
35 |
return p
|
36 |
|
37 |
|
|
|
124 |
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
|
125 |
|
126 |
|
127 |
+
class CrossConv(nn.Module):
|
128 |
+
# Cross Convolution Downsample
|
129 |
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
130 |
+
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
131 |
+
super().__init__()
|
132 |
+
c_ = int(c2 * e) # hidden channels
|
133 |
+
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
134 |
+
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
135 |
+
self.add = shortcut and c1 == c2
|
136 |
+
|
137 |
+
def forward(self, x):
|
138 |
+
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
139 |
+
|
140 |
+
|
141 |
class C3(nn.Module):
|
142 |
# CSP Bottleneck with 3 convolutions
|
143 |
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
|
|
|
147 |
self.cv2 = Conv(c1, c_, 1, 1)
|
148 |
self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
|
149 |
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
|
|
150 |
|
151 |
def forward(self, x):
|
152 |
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
153 |
|
154 |
|
155 |
+
class C3x(C3):
|
156 |
+
# C3 module with cross-convolutions
|
157 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
158 |
+
super().__init__(c1, c2, n, shortcut, g, e)
|
159 |
+
c_ = int(c2 * e)
|
160 |
+
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
|
161 |
+
|
162 |
+
|
163 |
class C3TR(C3):
|
164 |
# C3 module with TransformerBlock()
|
165 |
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
models/experimental.py
CHANGED
@@ -12,20 +12,6 @@ from models.common import Conv
|
|
12 |
from utils.downloads import attempt_download
|
13 |
|
14 |
|
15 |
-
class CrossConv(nn.Module):
|
16 |
-
# Cross Convolution Downsample
|
17 |
-
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
18 |
-
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
19 |
-
super().__init__()
|
20 |
-
c_ = int(c2 * e) # hidden channels
|
21 |
-
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
22 |
-
self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
|
23 |
-
self.add = shortcut and c1 == c2
|
24 |
-
|
25 |
-
def forward(self, x):
|
26 |
-
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
27 |
-
|
28 |
-
|
29 |
class Sum(nn.Module):
|
30 |
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
31 |
def __init__(self, n, weight=False): # n: number of inputs
|
|
|
12 |
from utils.downloads import attempt_download
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
class Sum(nn.Module):
|
16 |
# Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
|
17 |
def __init__(self, n, weight=False): # n: number of inputs
|
models/tf.py
CHANGED
@@ -27,8 +27,8 @@ import torch
|
|
27 |
import torch.nn as nn
|
28 |
from tensorflow import keras
|
29 |
|
30 |
-
from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
|
31 |
-
from models.experimental import
|
32 |
from models.yolo import Detect
|
33 |
from utils.activations import SiLU
|
34 |
from utils.general import LOGGER, make_divisible, print_args
|
@@ -50,10 +50,13 @@ class TFBN(keras.layers.Layer):
|
|
50 |
|
51 |
|
52 |
class TFPad(keras.layers.Layer):
|
53 |
-
|
54 |
def __init__(self, pad):
|
55 |
super().__init__()
|
56 |
-
|
|
|
|
|
|
|
57 |
|
58 |
def call(self, inputs):
|
59 |
return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
|
@@ -65,10 +68,8 @@ class TFConv(keras.layers.Layer):
|
|
65 |
# ch_in, ch_out, weights, kernel, stride, padding, groups
|
66 |
super().__init__()
|
67 |
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
|
68 |
-
assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
|
69 |
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
|
70 |
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
|
71 |
-
|
72 |
conv = keras.layers.Conv2D(
|
73 |
filters=c2,
|
74 |
kernel_size=k,
|
@@ -90,8 +91,7 @@ class TFDWConv(keras.layers.Layer):
|
|
90 |
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
|
91 |
# ch_in, ch_out, weights, kernel, stride, padding, groups
|
92 |
super().__init__()
|
93 |
-
assert
|
94 |
-
|
95 |
conv = keras.layers.DepthwiseConv2D(
|
96 |
kernel_size=k,
|
97 |
strides=s,
|
@@ -133,6 +133,19 @@ class TFBottleneck(keras.layers.Layer):
|
|
133 |
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
|
134 |
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
class TFConv2d(keras.layers.Layer):
|
137 |
# Substitution for PyTorch nn.Conv2D
|
138 |
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
|
@@ -187,6 +200,22 @@ class TFC3(keras.layers.Layer):
|
|
187 |
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
|
188 |
|
189 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
class TFSPP(keras.layers.Layer):
|
191 |
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
192 |
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
|
@@ -310,12 +339,12 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
|
|
310 |
pass
|
311 |
|
312 |
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
313 |
-
if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
|
314 |
c1, c2 = ch[f], args[0]
|
315 |
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
|
316 |
|
317 |
args = [c1, c2, *args[1:]]
|
318 |
-
if m in [BottleneckCSP, C3]:
|
319 |
args.insert(2, n)
|
320 |
n = 1
|
321 |
elif m is nn.BatchNorm2d:
|
|
|
27 |
import torch.nn as nn
|
28 |
from tensorflow import keras
|
29 |
|
30 |
+
from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv, Focus, autopad
|
31 |
+
from models.experimental import MixConv2d, attempt_load
|
32 |
from models.yolo import Detect
|
33 |
from utils.activations import SiLU
|
34 |
from utils.general import LOGGER, make_divisible, print_args
|
|
|
50 |
|
51 |
|
52 |
class TFPad(keras.layers.Layer):
|
53 |
+
# Pad inputs in spatial dimensions 1 and 2
|
54 |
def __init__(self, pad):
|
55 |
super().__init__()
|
56 |
+
if isinstance(pad, int):
|
57 |
+
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
|
58 |
+
else: # tuple/list
|
59 |
+
self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
|
60 |
|
61 |
def call(self, inputs):
|
62 |
return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
|
|
|
68 |
# ch_in, ch_out, weights, kernel, stride, padding, groups
|
69 |
super().__init__()
|
70 |
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
|
|
|
71 |
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
|
72 |
# see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
|
|
|
73 |
conv = keras.layers.Conv2D(
|
74 |
filters=c2,
|
75 |
kernel_size=k,
|
|
|
91 |
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
|
92 |
# ch_in, ch_out, weights, kernel, stride, padding, groups
|
93 |
super().__init__()
|
94 |
+
assert g == c1 == c2, f'TFDWConv() groups={g} must equal input={c1} and output={c2} channels'
|
|
|
95 |
conv = keras.layers.DepthwiseConv2D(
|
96 |
kernel_size=k,
|
97 |
strides=s,
|
|
|
133 |
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
|
134 |
|
135 |
|
136 |
+
class TFCrossConv(keras.layers.Layer):
|
137 |
+
# Cross Convolution
|
138 |
+
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
|
139 |
+
super().__init__()
|
140 |
+
c_ = int(c2 * e) # hidden channels
|
141 |
+
self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
|
142 |
+
self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
|
143 |
+
self.add = shortcut and c1 == c2
|
144 |
+
|
145 |
+
def call(self, inputs):
|
146 |
+
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
|
147 |
+
|
148 |
+
|
149 |
class TFConv2d(keras.layers.Layer):
|
150 |
# Substitution for PyTorch nn.Conv2D
|
151 |
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
|
|
|
200 |
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
|
201 |
|
202 |
|
203 |
+
class TFC3x(keras.layers.Layer):
|
204 |
+
# 3 module with cross-convolutions
|
205 |
+
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
|
206 |
+
# ch_in, ch_out, number, shortcut, groups, expansion
|
207 |
+
super().__init__()
|
208 |
+
c_ = int(c2 * e) # hidden channels
|
209 |
+
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
210 |
+
self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
|
211 |
+
self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
|
212 |
+
self.m = keras.Sequential([
|
213 |
+
TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
|
214 |
+
|
215 |
+
def call(self, inputs):
|
216 |
+
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
|
217 |
+
|
218 |
+
|
219 |
class TFSPP(keras.layers.Layer):
|
220 |
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
221 |
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
|
|
|
339 |
pass
|
340 |
|
341 |
n = max(round(n * gd), 1) if n > 1 else n # depth gain
|
342 |
+
if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, C3x]:
|
343 |
c1, c2 = ch[f], args[0]
|
344 |
c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
|
345 |
|
346 |
args = [c1, c2, *args[1:]]
|
347 |
+
if m in [BottleneckCSP, C3, C3x]:
|
348 |
args.insert(2, n)
|
349 |
n = 1
|
350 |
elif m is nn.BatchNorm2d:
|
models/yolo.py
CHANGED
@@ -266,13 +266,13 @@ def parse_model(d, ch): # model_dict, input_channels(3)
|
|
266 |
|
267 |
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
268 |
if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
|
269 |
-
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost):
|
270 |
c1, c2 = ch[f], args[0]
|
271 |
if c2 != no: # if not output
|
272 |
c2 = make_divisible(c2 * gw, 8)
|
273 |
|
274 |
args = [c1, c2, *args[1:]]
|
275 |
-
if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
|
276 |
args.insert(2, n) # number of repeats
|
277 |
n = 1
|
278 |
elif m is nn.BatchNorm2d:
|
|
|
266 |
|
267 |
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
|
268 |
if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
|
269 |
+
BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, C3x):
|
270 |
c1, c2 = ch[f], args[0]
|
271 |
if c2 != no: # if not output
|
272 |
c2 = make_divisible(c2 * gw, 8)
|
273 |
|
274 |
args = [c1, c2, *args[1:]]
|
275 |
+
if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
|
276 |
args.insert(2, n) # number of repeats
|
277 |
n = 1
|
278 |
elif m is nn.BatchNorm2d:
|