glenn-jocher pre-commit-ci[bot] commited on
Commit
fb7fa5b
·
unverified ·
1 Parent(s): d29df68

New TensorFlow `TFCrossConv()` module (#7827)

Browse files

* New TensorFlow `TFCrossConv()` module

* Move from experimental to common

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add C3x

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Add to C3x to yolo.py

* Add to C3x to tf.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* TFC3x bug fix

* TFC3x bug fix

* TFC3x bug fix

* Add TFDWConv g==c1==c2 check

* Add comment

* Update tf.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>

Files changed (4) hide show
  1. models/common.py +23 -2
  2. models/experimental.py +0 -14
  3. models/tf.py +39 -10
  4. models/yolo.py +2 -2
models/common.py CHANGED
@@ -31,7 +31,7 @@ from utils.torch_utils import copy_attr, time_sync
31
  def autopad(k, p=None): # kernel, padding
32
  # Pad to 'same'
33
  if p is None:
34
- p = k // 2 if isinstance(k, int) else (x // 2 for x in k) # auto-pad
35
  return p
36
 
37
 
@@ -124,6 +124,20 @@ class BottleneckCSP(nn.Module):
124
  return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
125
 
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  class C3(nn.Module):
128
  # CSP Bottleneck with 3 convolutions
129
  def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
@@ -133,12 +147,19 @@ class C3(nn.Module):
133
  self.cv2 = Conv(c1, c_, 1, 1)
134
  self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
135
  self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
136
- # self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
137
 
138
  def forward(self, x):
139
  return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
140
 
141
 
 
 
 
 
 
 
 
 
142
  class C3TR(C3):
143
  # C3 module with TransformerBlock()
144
  def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
 
31
  def autopad(k, p=None): # kernel, padding
32
  # Pad to 'same'
33
  if p is None:
34
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
35
  return p
36
 
37
 
 
124
  return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
125
 
126
 
127
+ class CrossConv(nn.Module):
128
+ # Cross Convolution Downsample
129
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
130
+ # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
131
+ super().__init__()
132
+ c_ = int(c2 * e) # hidden channels
133
+ self.cv1 = Conv(c1, c_, (1, k), (1, s))
134
+ self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
135
+ self.add = shortcut and c1 == c2
136
+
137
+ def forward(self, x):
138
+ return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
139
+
140
+
141
  class C3(nn.Module):
142
  # CSP Bottleneck with 3 convolutions
143
  def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): # ch_in, ch_out, number, shortcut, groups, expansion
 
147
  self.cv2 = Conv(c1, c_, 1, 1)
148
  self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
149
  self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
 
150
 
151
  def forward(self, x):
152
  return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
153
 
154
 
155
+ class C3x(C3):
156
+ # C3 module with cross-convolutions
157
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
158
+ super().__init__(c1, c2, n, shortcut, g, e)
159
+ c_ = int(c2 * e)
160
+ self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
161
+
162
+
163
  class C3TR(C3):
164
  # C3 module with TransformerBlock()
165
  def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
models/experimental.py CHANGED
@@ -12,20 +12,6 @@ from models.common import Conv
12
  from utils.downloads import attempt_download
13
 
14
 
15
- class CrossConv(nn.Module):
16
- # Cross Convolution Downsample
17
- def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
18
- # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
19
- super().__init__()
20
- c_ = int(c2 * e) # hidden channels
21
- self.cv1 = Conv(c1, c_, (1, k), (1, s))
22
- self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
23
- self.add = shortcut and c1 == c2
24
-
25
- def forward(self, x):
26
- return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
27
-
28
-
29
  class Sum(nn.Module):
30
  # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
31
  def __init__(self, n, weight=False): # n: number of inputs
 
12
  from utils.downloads import attempt_download
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  class Sum(nn.Module):
16
  # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
17
  def __init__(self, n, weight=False): # n: number of inputs
models/tf.py CHANGED
@@ -27,8 +27,8 @@ import torch
27
  import torch.nn as nn
28
  from tensorflow import keras
29
 
30
- from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
31
- from models.experimental import CrossConv, MixConv2d, attempt_load
32
  from models.yolo import Detect
33
  from utils.activations import SiLU
34
  from utils.general import LOGGER, make_divisible, print_args
@@ -50,10 +50,13 @@ class TFBN(keras.layers.Layer):
50
 
51
 
52
  class TFPad(keras.layers.Layer):
53
-
54
  def __init__(self, pad):
55
  super().__init__()
56
- self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
 
 
 
57
 
58
  def call(self, inputs):
59
  return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
@@ -65,10 +68,8 @@ class TFConv(keras.layers.Layer):
65
  # ch_in, ch_out, weights, kernel, stride, padding, groups
66
  super().__init__()
67
  assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
68
- assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
69
  # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
70
  # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
71
-
72
  conv = keras.layers.Conv2D(
73
  filters=c2,
74
  kernel_size=k,
@@ -90,8 +91,7 @@ class TFDWConv(keras.layers.Layer):
90
  def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
91
  # ch_in, ch_out, weights, kernel, stride, padding, groups
92
  super().__init__()
93
- assert isinstance(k, int), "Convolution with multiple kernels are not allowed."
94
-
95
  conv = keras.layers.DepthwiseConv2D(
96
  kernel_size=k,
97
  strides=s,
@@ -133,6 +133,19 @@ class TFBottleneck(keras.layers.Layer):
133
  return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
134
 
135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  class TFConv2d(keras.layers.Layer):
137
  # Substitution for PyTorch nn.Conv2D
138
  def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
@@ -187,6 +200,22 @@ class TFC3(keras.layers.Layer):
187
  return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
188
 
189
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  class TFSPP(keras.layers.Layer):
191
  # Spatial pyramid pooling layer used in YOLOv3-SPP
192
  def __init__(self, c1, c2, k=(5, 9, 13), w=None):
@@ -310,12 +339,12 @@ def parse_model(d, ch, model, imgsz): # model_dict, input_channels(3)
310
  pass
311
 
312
  n = max(round(n * gd), 1) if n > 1 else n # depth gain
313
- if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
314
  c1, c2 = ch[f], args[0]
315
  c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
316
 
317
  args = [c1, c2, *args[1:]]
318
- if m in [BottleneckCSP, C3]:
319
  args.insert(2, n)
320
  n = 1
321
  elif m is nn.BatchNorm2d:
 
27
  import torch.nn as nn
28
  from tensorflow import keras
29
 
30
+ from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, C3x, Concat, Conv, CrossConv, DWConv, Focus, autopad
31
+ from models.experimental import MixConv2d, attempt_load
32
  from models.yolo import Detect
33
  from utils.activations import SiLU
34
  from utils.general import LOGGER, make_divisible, print_args
 
50
 
51
 
52
  class TFPad(keras.layers.Layer):
53
+ # Pad inputs in spatial dimensions 1 and 2
54
  def __init__(self, pad):
55
  super().__init__()
56
+ if isinstance(pad, int):
57
+ self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
58
+ else: # tuple/list
59
+ self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
60
 
61
  def call(self, inputs):
62
  return tf.pad(inputs, self.pad, mode='constant', constant_values=0)
 
68
  # ch_in, ch_out, weights, kernel, stride, padding, groups
69
  super().__init__()
70
  assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
 
71
  # TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
72
  # see https://stackoverflow.com/questions/52975843/comparing-conv2d-with-padding-between-tensorflow-and-pytorch
 
73
  conv = keras.layers.Conv2D(
74
  filters=c2,
75
  kernel_size=k,
 
91
  def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
92
  # ch_in, ch_out, weights, kernel, stride, padding, groups
93
  super().__init__()
94
+ assert g == c1 == c2, f'TFDWConv() groups={g} must equal input={c1} and output={c2} channels'
 
95
  conv = keras.layers.DepthwiseConv2D(
96
  kernel_size=k,
97
  strides=s,
 
133
  return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
134
 
135
 
136
+ class TFCrossConv(keras.layers.Layer):
137
+ # Cross Convolution
138
+ def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
139
+ super().__init__()
140
+ c_ = int(c2 * e) # hidden channels
141
+ self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
142
+ self.cv2 = TFConv(c_, c2, (k, 1), (s, 1), g=g, w=w.cv2)
143
+ self.add = shortcut and c1 == c2
144
+
145
+ def call(self, inputs):
146
+ return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
147
+
148
+
149
  class TFConv2d(keras.layers.Layer):
150
  # Substitution for PyTorch nn.Conv2D
151
  def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
 
200
  return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
201
 
202
 
203
+ class TFC3x(keras.layers.Layer):
204
+ # 3 module with cross-convolutions
205
+ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
206
+ # ch_in, ch_out, number, shortcut, groups, expansion
207
+ super().__init__()
208
+ c_ = int(c2 * e) # hidden channels
209
+ self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
210
+ self.cv2 = TFConv(c1, c_, 1, 1, w=w.cv2)
211
+ self.cv3 = TFConv(2 * c_, c2, 1, 1, w=w.cv3)
212
+ self.m = keras.Sequential([
213
+ TFCrossConv(c_, c_, k=3, s=1, g=g, e=1.0, shortcut=shortcut, w=w.m[j]) for j in range(n)])
214
+
215
+ def call(self, inputs):
216
+ return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
217
+
218
+
219
  class TFSPP(keras.layers.Layer):
220
  # Spatial pyramid pooling layer used in YOLOv3-SPP
221
  def __init__(self, c1, c2, k=(5, 9, 13), w=None):
 
339
  pass
340
 
341
  n = max(round(n * gd), 1) if n > 1 else n # depth gain
342
+ if m in [nn.Conv2d, Conv, Bottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3, C3x]:
343
  c1, c2 = ch[f], args[0]
344
  c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
345
 
346
  args = [c1, c2, *args[1:]]
347
+ if m in [BottleneckCSP, C3, C3x]:
348
  args.insert(2, n)
349
  n = 1
350
  elif m is nn.BatchNorm2d:
models/yolo.py CHANGED
@@ -266,13 +266,13 @@ def parse_model(d, ch): # model_dict, input_channels(3)
266
 
267
  n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
268
  if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
269
- BottleneckCSP, C3, C3TR, C3SPP, C3Ghost):
270
  c1, c2 = ch[f], args[0]
271
  if c2 != no: # if not output
272
  c2 = make_divisible(c2 * gw, 8)
273
 
274
  args = [c1, c2, *args[1:]]
275
- if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
276
  args.insert(2, n) # number of repeats
277
  n = 1
278
  elif m is nn.BatchNorm2d:
 
266
 
267
  n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
268
  if m in (Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
269
+ BottleneckCSP, C3, C3TR, C3SPP, C3Ghost, C3x):
270
  c1, c2 = ch[f], args[0]
271
  if c2 != no: # if not output
272
  c2 = make_divisible(c2 * gw, 8)
273
 
274
  args = [c1, c2, *args[1:]]
275
+ if m in [BottleneckCSP, C3, C3TR, C3Ghost, C3x]:
276
  args.insert(2, n) # number of repeats
277
  n = 1
278
  elif m is nn.BatchNorm2d: