henry000 commited on
Commit
787b81d
Β·
2 Parent(s): 5fed6e3 183312f

πŸ”€ [Merge] branch 'MODEL' into SETUP

Browse files
Files changed (4) hide show
  1. config/model/v7-base.yaml +26 -22
  2. model/module.py +210 -48
  3. model/yolo.py +91 -7
  4. train.py +3 -0
config/model/v7-base.yaml CHANGED
@@ -1,5 +1,5 @@
1
- anchor:
2
- [1, 2, 3]
3
  model:
4
  backbone:
5
  - Conv:
@@ -27,8 +27,8 @@ model:
27
  source: [-1, -3, -5, -6]
28
  - Conv:
29
  args: {out_channels: 256, kernel_size: 1}
30
- - MP:
31
- args: []
32
  - Conv:
33
  args: {out_channels: 128, kernel_size: 1}
34
  - Conv:
@@ -56,8 +56,8 @@ model:
56
  tags: 8x
57
  - Conv:
58
  args: {out_channels: 512, kernel_size: 1}
59
- - MP:
60
- args: []
61
  - Conv:
62
  args: {out_channels: 256, kernel_size: 1}
63
  - Conv:
@@ -85,8 +85,8 @@ model:
85
  - Conv:
86
  args: {out_channels: 1024, kernel_size: 1}
87
  tags: 16x
88
- - MP:
89
- args: []
90
  - Conv:
91
  args: {out_channels: 512, kernel_size: 1}
92
  - Conv:
@@ -115,12 +115,12 @@ model:
115
  args: {out_channels: 1024, kernel_size: 1}
116
  tags: 32x
117
  head:
118
- - SPPCSPC:
119
- args: [512]
120
  - Conv:
121
  args: {out_channels: 256, kernel_size: 1}
122
- - Upsample:
123
- args: [None, 2, nearest]
124
  - Conv:
125
  args: {out_channels: 256, kernel_size: 1}
126
  source: 16x
@@ -145,8 +145,8 @@ model:
145
  args: {out_channels: 256, kernel_size: 1}
146
  - Conv:
147
  args: {out_channels: 128, kernel_size: 1}
148
- - Upsample:
149
- args: [None, 2, nearest]
150
  - Conv:
151
  args: {out_channels: 128, kernel_size: 1}
152
  source: 8x
@@ -169,8 +169,8 @@ model:
169
  source: [-1, -2, -3, -4, -5, -6]
170
  - Conv:
171
  args: {out_channels: 128, kernel_size: 1}
172
- - MP:
173
- args: []
174
  - Conv:
175
  args: {out_channels: 128, kernel_size: 1}
176
  - Conv:
@@ -197,8 +197,8 @@ model:
197
  source: [-1, -2, -3, -4, -5, -6]
198
  - Conv:
199
  args: {out_channels: 256, kernel_size: 1}
200
- - MP:
201
- args: []
202
  - Conv:
203
  args: {out_channels: 256, kernel_size: 1}
204
  - Conv:
@@ -226,14 +226,18 @@ model:
226
  - Conv:
227
  args: {out_channels: 512, kernel_size: 1}
228
  - RepConv:
229
- args: [256, 3, 1]
230
  source: 75
231
  - RepConv:
232
- args: [512, 3, 1]
233
  source: 88
234
  - RepConv:
235
- args: [1024, 3, 1]
236
  source: 101
237
  - IDetect:
238
- args: [nc, anchors]
 
 
 
 
239
  source: [102, 103, 104]
 
1
+ nc: 80
2
+
3
  model:
4
  backbone:
5
  - Conv:
 
27
  source: [-1, -3, -5, -6]
28
  - Conv:
29
  args: {out_channels: 256, kernel_size: 1}
30
+ - MaxPool:
31
+ args: {}
32
  - Conv:
33
  args: {out_channels: 128, kernel_size: 1}
34
  - Conv:
 
56
  tags: 8x
57
  - Conv:
58
  args: {out_channels: 512, kernel_size: 1}
59
+ - MaxPool:
60
+ args: {}
61
  - Conv:
62
  args: {out_channels: 256, kernel_size: 1}
63
  - Conv:
 
85
  - Conv:
86
  args: {out_channels: 1024, kernel_size: 1}
87
  tags: 16x
88
+ - MaxPool:
89
+ args: {}
90
  - Conv:
91
  args: {out_channels: 512, kernel_size: 1}
92
  - Conv:
 
115
  args: {out_channels: 1024, kernel_size: 1}
116
  tags: 32x
117
  head:
118
+ - SPPCSPConv:
119
+ args: {out_channels: 512}
120
  - Conv:
121
  args: {out_channels: 256, kernel_size: 1}
122
+ - UpSample:
123
+ args: {scale_factor: 2}
124
  - Conv:
125
  args: {out_channels: 256, kernel_size: 1}
126
  source: 16x
 
145
  args: {out_channels: 256, kernel_size: 1}
146
  - Conv:
147
  args: {out_channels: 128, kernel_size: 1}
148
+ - UpSample:
149
+ args: {scale_factor: 2}
150
  - Conv:
151
  args: {out_channels: 128, kernel_size: 1}
152
  source: 8x
 
169
  source: [-1, -2, -3, -4, -5, -6]
170
  - Conv:
171
  args: {out_channels: 128, kernel_size: 1}
172
+ - MaxPool:
173
+ args: {}
174
  - Conv:
175
  args: {out_channels: 128, kernel_size: 1}
176
  - Conv:
 
197
  source: [-1, -2, -3, -4, -5, -6]
198
  - Conv:
199
  args: {out_channels: 256, kernel_size: 1}
200
+ - MaxPool:
201
+ args: {}
202
  - Conv:
203
  args: {out_channels: 256, kernel_size: 1}
204
  - Conv:
 
226
  - Conv:
227
  args: {out_channels: 512, kernel_size: 1}
228
  - RepConv:
229
+ args: {out_channels: 256}
230
  source: 75
231
  - RepConv:
232
+ args: {out_channels: 512}
233
  source: 88
234
  - RepConv:
235
+ args: {out_channels: 1024}
236
  source: 101
237
  - IDetect:
238
+ args:
239
+ anchors:
240
+ - [12,16, 19,36, 40,28] # P3/8
241
+ - [36,75, 76,55, 72,146] # P4/16
242
+ - [142,110, 192,243, 459,401] # P5/32
243
  source: [102, 103, 104]
model/module.py CHANGED
@@ -1,14 +1,25 @@
1
  import torch
2
  import torch.nn as nn
3
 
4
- # basic
5
 
 
6
  class Conv(nn.Module):
7
  # basic convlution
8
- def __init__(self, in_channels, out_channels, kernel_size,
9
- stride=1, padding=0, dilation=1, groups=1, act=nn.ReLU(),
10
- bias=False, auto_padding=True, padding_mode='zeros'):
11
-
 
 
 
 
 
 
 
 
 
 
 
12
  super().__init__()
13
 
14
  # not yet handle the case when dilation is a tuple
@@ -18,7 +29,9 @@ class Conv(nn.Module):
18
  else:
19
  padding = [(dilation * (k - 1) + 1) // 2 for k in kernel_size]
20
 
21
- self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, dilation=dilation, bias=bias)
 
 
22
  self.bn = nn.BatchNorm2d(out_channels)
23
  self.act = act if isinstance(act, nn.Module) else nn.Identity()
24
 
@@ -33,11 +46,9 @@ class Conv(nn.Module):
33
 
34
 
35
  # RepVGG
36
-
37
  class RepConv(nn.Module):
38
  # https://github.com/DingXiaoH/RepVGG
39
- def __init__(self, in_channels, out_channels, kernel_size=3,
40
- stride=1, groups=1, act=nn.ReLU()):
41
 
42
  super().__init__()
43
 
@@ -56,11 +67,9 @@ class RepConv(nn.Module):
56
 
57
 
58
  # ResNet
59
-
60
  class Res(nn.Module):
61
  # ResNet bottleneck
62
- def __init__(self, in_channels, out_channels,
63
- groups=1, act=nn.ReLU(), ratio=0.25):
64
 
65
  super().__init__()
66
 
@@ -75,8 +84,7 @@ class Res(nn.Module):
75
 
76
  class RepRes(nn.Module):
77
  # RepResNet bottleneck
78
- def __init__(self, in_channels, out_channels,
79
- groups=1, act=nn.ReLU(), ratio=0.25):
80
 
81
  super().__init__()
82
 
@@ -91,14 +99,21 @@ class RepRes(nn.Module):
91
 
92
  class ConvBlock(nn.Module):
93
  # ConvBlock
94
- def __init__(self, in_channels,
95
- repeat=1, act=nn.ReLU(), ratio=1.0):
96
 
97
  super().__init__()
98
 
99
  h_channels = int(in_channels * ratio)
100
- self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else Conv(in_channels, h_channels, 3, 1, act=act)
101
- self.cb = nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
 
 
 
 
 
 
 
 
102
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
103
 
104
  def forward(self, x):
@@ -107,14 +122,21 @@ class ConvBlock(nn.Module):
107
 
108
  class RepConvBlock(nn.Module):
109
  # ConvBlock
110
- def __init__(self, in_channels,
111
- repeat=1, act=nn.ReLU(), ratio=1.0):
112
 
113
  super().__init__()
114
 
115
  h_channels = int(in_channels * ratio)
116
- self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else RepConv(in_channels, h_channels, 3, 1, act=act)
117
- self.cb = nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
 
 
 
 
 
 
 
 
118
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
119
 
120
  def forward(self, x):
@@ -123,14 +145,21 @@ class RepConvBlock(nn.Module):
123
 
124
  class ResConvBlock(nn.Module):
125
  # ResConvBlock
126
- def __init__(self, in_channels,
127
- repeat=1, act=nn.ReLU(), ratio=1.0):
128
 
129
  super().__init__()
130
 
131
  h_channels = int(in_channels * ratio)
132
- self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else Conv(in_channels, h_channels, 3, 1, act=act)
133
- self.cb = nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
 
 
 
 
 
 
 
 
134
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
135
 
136
  def forward(self, x):
@@ -139,14 +168,21 @@ class ResConvBlock(nn.Module):
139
 
140
  class ResRepConvBlock(nn.Module):
141
  # ResConvBlock
142
- def __init__(self, in_channels,
143
- repeat=1, act=nn.ReLU(), ratio=1.0):
144
 
145
  super().__init__()
146
 
147
  h_channels = int(in_channels * ratio)
148
- self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else RepConv(in_channels, h_channels, 3, 1, act=act)
149
- self.cb = nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
 
 
 
 
 
 
 
 
150
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
151
 
152
  def forward(self, x):
@@ -154,11 +190,9 @@ class ResRepConvBlock(nn.Module):
154
 
155
 
156
  # Darknet
157
-
158
  class Dark(nn.Module):
159
  # DarkNet bottleneck
160
- def __init__(self, in_channels, out_channels,
161
- groups=1, act=nn.ReLU(), ratio=0.5):
162
 
163
  super().__init__()
164
 
@@ -172,8 +206,7 @@ class Dark(nn.Module):
172
 
173
  class RepDark(nn.Module):
174
  # RepDarkNet bottleneck
175
- def __init__(self, in_channels, out_channels,
176
- groups=1, act=nn.ReLU(), ratio=0.5):
177
 
178
  super().__init__()
179
 
@@ -186,11 +219,9 @@ class RepDark(nn.Module):
186
 
187
 
188
  # CSPNet
189
-
190
  class CSP(nn.Module):
191
  # CSPNet
192
- def __init__(self, in_channels, out_channels,
193
- repeat=1, cb_repeat=2, act=nn.ReLU(), ratio=1.0):
194
 
195
  super().__init__()
196
 
@@ -208,14 +239,15 @@ class CSP(nn.Module):
208
 
209
  class CSPDark(nn.Module):
210
  # CSPNet
211
- def __init__(self, in_channels, out_channels,
212
- repeat=1, groups=1, act=nn.ReLU(), ratio=1.0):
213
 
214
  super().__init__()
215
 
216
  h_channels = in_channels // 2
217
  self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
218
- self.cb = nn.Sequential(*(Dark(h_channels, h_channels, groups=groups, act=act, ratio=ratio) for _ in range(repeat)))
 
 
219
  self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)
220
 
221
  def forward(self, x):
@@ -226,18 +258,16 @@ class CSPDark(nn.Module):
226
 
227
 
228
  # ELAN
229
-
230
  class ELAN(nn.Module):
231
  # ELAN
232
- def __init__(self, in_channels, out_channels, med_channels,
233
- elan_repeat=2, cb_repeat=2, ratio=1.0):
234
 
235
  super().__init__()
236
 
237
  h_channels = med_channels // 2
238
  self.cv1 = Conv(in_channels, med_channels, 1, 1)
239
  self.cb = nn.ModuleList(ConvBlock(h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
240
- self.cv2 = Conv((2+elan_repeat) * h_channels, out_channels, 1, 1)
241
 
242
  def forward(self, x):
243
 
@@ -249,15 +279,14 @@ class ELAN(nn.Module):
249
 
250
  class CSPELAN(nn.Module):
251
  # ELAN
252
- def __init__(self, in_channels, out_channels, med_channels,
253
- elan_repeat=2, cb_repeat=2, ratio=1.0):
254
 
255
  super().__init__()
256
 
257
  h_channels = med_channels // 2
258
  self.cv1 = Conv(in_channels, med_channels, 1, 1)
259
  self.cb = nn.ModuleList(CSP(h_channels, h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
260
- self.cv2 = Conv((2+elan_repeat) * h_channels, out_channels, 1, 1)
261
 
262
  def forward(self, x):
263
 
@@ -265,3 +294,136 @@ class CSPELAN(nn.Module):
265
  y.extend((m(y[-1])) for m in self.cb)
266
 
267
  return self.cv2(torch.cat(y, 1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
  import torch.nn as nn
3
 
 
4
 
5
+ # basic
6
  class Conv(nn.Module):
7
  # basic convlution
8
+ def __init__(
9
+ self,
10
+ in_channels,
11
+ out_channels,
12
+ kernel_size,
13
+ stride=1,
14
+ padding=0,
15
+ dilation=1,
16
+ groups=1,
17
+ act=nn.ReLU(),
18
+ bias=False,
19
+ auto_padding=True,
20
+ padding_mode="zeros",
21
+ ):
22
+
23
  super().__init__()
24
 
25
  # not yet handle the case when dilation is a tuple
 
29
  else:
30
  padding = [(dilation * (k - 1) + 1) // 2 for k in kernel_size]
31
 
32
+ self.conv = nn.Conv2d(
33
+ in_channels, out_channels, kernel_size, stride, padding, groups=groups, dilation=dilation, bias=bias
34
+ )
35
  self.bn = nn.BatchNorm2d(out_channels)
36
  self.act = act if isinstance(act, nn.Module) else nn.Identity()
37
 
 
46
 
47
 
48
  # RepVGG
 
49
  class RepConv(nn.Module):
50
  # https://github.com/DingXiaoH/RepVGG
51
+ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, groups=1, act=nn.ReLU()):
 
52
 
53
  super().__init__()
54
 
 
67
 
68
 
69
  # ResNet
 
70
  class Res(nn.Module):
71
  # ResNet bottleneck
72
+ def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):
 
73
 
74
  super().__init__()
75
 
 
84
 
85
  class RepRes(nn.Module):
86
  # RepResNet bottleneck
87
+ def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):
 
88
 
89
  super().__init__()
90
 
 
99
 
100
  class ConvBlock(nn.Module):
101
  # ConvBlock
102
+ def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
 
103
 
104
  super().__init__()
105
 
106
  h_channels = int(in_channels * ratio)
107
+ self.cv1 = (
108
+ Conv(in_channels, in_channels, 3, 1, act=act)
109
+ if repeat == 1
110
+ else Conv(in_channels, h_channels, 3, 1, act=act)
111
+ )
112
+ self.cb = (
113
+ nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
114
+ if repeat > 2
115
+ else nn.Identity()
116
+ )
117
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
118
 
119
  def forward(self, x):
 
122
 
123
  class RepConvBlock(nn.Module):
124
  # ConvBlock
125
+ def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
 
126
 
127
  super().__init__()
128
 
129
  h_channels = int(in_channels * ratio)
130
+ self.cv1 = (
131
+ Conv(in_channels, in_channels, 3, 1, act=act)
132
+ if repeat == 1
133
+ else RepConv(in_channels, h_channels, 3, 1, act=act)
134
+ )
135
+ self.cb = (
136
+ nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
137
+ if repeat > 2
138
+ else nn.Identity()
139
+ )
140
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
141
 
142
  def forward(self, x):
 
145
 
146
  class ResConvBlock(nn.Module):
147
  # ResConvBlock
148
+ def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
 
149
 
150
  super().__init__()
151
 
152
  h_channels = int(in_channels * ratio)
153
+ self.cv1 = (
154
+ Conv(in_channels, in_channels, 3, 1, act=act)
155
+ if repeat == 1
156
+ else Conv(in_channels, h_channels, 3, 1, act=act)
157
+ )
158
+ self.cb = (
159
+ nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
160
+ if repeat > 2
161
+ else nn.Identity()
162
+ )
163
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
164
 
165
  def forward(self, x):
 
168
 
169
  class ResRepConvBlock(nn.Module):
170
  # ResConvBlock
171
+ def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
 
172
 
173
  super().__init__()
174
 
175
  h_channels = int(in_channels * ratio)
176
+ self.cv1 = (
177
+ Conv(in_channels, in_channels, 3, 1, act=act)
178
+ if repeat == 1
179
+ else RepConv(in_channels, h_channels, 3, 1, act=act)
180
+ )
181
+ self.cb = (
182
+ nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
183
+ if repeat > 2
184
+ else nn.Identity()
185
+ )
186
  self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
187
 
188
  def forward(self, x):
 
190
 
191
 
192
  # Darknet
 
193
  class Dark(nn.Module):
194
  # DarkNet bottleneck
195
+ def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):
 
196
 
197
  super().__init__()
198
 
 
206
 
207
  class RepDark(nn.Module):
208
  # RepDarkNet bottleneck
209
+ def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):
 
210
 
211
  super().__init__()
212
 
 
219
 
220
 
221
  # CSPNet
 
222
  class CSP(nn.Module):
223
  # CSPNet
224
+ def __init__(self, in_channels, out_channels, repeat=1, cb_repeat=2, act=nn.ReLU(), ratio=1.0):
 
225
 
226
  super().__init__()
227
 
 
239
 
240
  class CSPDark(nn.Module):
241
  # CSPNet
242
+ def __init__(self, in_channels, out_channels, repeat=1, groups=1, act=nn.ReLU(), ratio=1.0):
 
243
 
244
  super().__init__()
245
 
246
  h_channels = in_channels // 2
247
  self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
248
+ self.cb = nn.Sequential(
249
+ *(Dark(h_channels, h_channels, groups=groups, act=act, ratio=ratio) for _ in range(repeat))
250
+ )
251
  self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)
252
 
253
  def forward(self, x):
 
258
 
259
 
260
  # ELAN
 
261
  class ELAN(nn.Module):
262
  # ELAN
263
+ def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):
 
264
 
265
  super().__init__()
266
 
267
  h_channels = med_channels // 2
268
  self.cv1 = Conv(in_channels, med_channels, 1, 1)
269
  self.cb = nn.ModuleList(ConvBlock(h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
270
+ self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)
271
 
272
  def forward(self, x):
273
 
 
279
 
280
  class CSPELAN(nn.Module):
281
  # ELAN
282
+ def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):
 
283
 
284
  super().__init__()
285
 
286
  h_channels = med_channels // 2
287
  self.cv1 = Conv(in_channels, med_channels, 1, 1)
288
  self.cb = nn.ModuleList(CSP(h_channels, h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
289
+ self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)
290
 
291
  def forward(self, x):
292
 
 
294
  y.extend((m(y[-1])) for m in self.cb)
295
 
296
  return self.cv2(torch.cat(y, 1))
297
+
298
+
299
+ class Concat(nn.Module):
300
+ def __init__(self, dim=1):
301
+ super(Concat, self).__init__()
302
+ self.dim = dim
303
+
304
+ def forward(self, x):
305
+ return torch.cat(x, self.dim)
306
+
307
+
308
+ class MaxPool(nn.Module):
309
+ def __init__(self, kernel_size: int = 2):
310
+ super().__init__()
311
+ self.pool_layer = nn.MaxPool2d(kernel_size=kernel_size, stride=kernel_size)
312
+
313
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
314
+ return self.pool_layer(x)
315
+
316
+
317
+ # TODO: check if Mit
318
+ class SPPCSPConv(nn.Module):
319
+ # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
320
+ def __init__(self, in_channels, out_channels, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
321
+ super(SPPCSPConv, self).__init__()
322
+ c_ = int(2 * out_channels * e) # hidden channels
323
+ self.cv1 = Conv(in_channels, c_, 1, 1)
324
+ self.cv2 = Conv(in_channels, c_, 1, 1)
325
+ self.cv3 = Conv(c_, c_, 3, 1)
326
+ self.cv4 = Conv(c_, c_, 1, 1)
327
+ self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
328
+ self.cv5 = Conv(4 * c_, c_, 1, 1)
329
+ self.cv6 = Conv(c_, c_, 3, 1)
330
+ self.cv7 = Conv(2 * c_, out_channels, 1, 1)
331
+
332
+ def forward(self, x):
333
+ x1 = self.cv4(self.cv3(self.cv1(x)))
334
+ y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
335
+ y2 = self.cv2(x)
336
+ return self.cv7(torch.cat((y1, y2), dim=1))
337
+
338
+
339
+ class ImplicitA(nn.Module):
340
+ """
341
+ Implement YOLOR - implicit knowledge(Add), paper: https://arxiv.org/abs/2105.04206
342
+ """
343
+
344
+ def __init__(self, channel: int, mean: float = 0.0, std: float = 0.02):
345
+ super().__init__()
346
+ self.channel = channel
347
+ self.mean = mean
348
+ self.std = std
349
+
350
+ self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
351
+ nn.init.normal_(self.implicit, mean=mean, std=self.std)
352
+
353
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
354
+ return self.implicit + x
355
+
356
+
357
+ class ImplicitM(nn.Module):
358
+ """
359
+ Implement YOLOR - implicit knowledge(multiply), paper: https://arxiv.org/abs/2105.04206
360
+ """
361
+
362
+ def __init__(self, channel: int, mean: float = 1.0, std: float = 0.02):
363
+ super().__init__()
364
+ self.channel = channel
365
+ self.mean = mean
366
+ self.std = std
367
+
368
+ self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
369
+ nn.init.normal_(self.implicit, mean=self.mean, std=self.std)
370
+
371
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
372
+ return self.implicit * x
373
+
374
+
375
+ class UpSample(nn.Module):
376
+ def __init__(self, **kwargs):
377
+ super().__init__()
378
+ self.UpSample = nn.Upsample(**kwargs)
379
+
380
+ def forward(self, x):
381
+ return self.UpSample(x)
382
+
383
+
384
+ class IDetect(nn.Module):
385
+ """
386
+ #TODO: Add Detect class, change IDetect base class
387
+ """
388
+
389
+ stride = None # strides computed during build
390
+ export = False # onnx export
391
+ end2end = False
392
+ include_nms = False
393
+ concat = False
394
+
395
+ def __init__(self, nc=80, anchors=(), ch=()): # detection layer
396
+ super(IDetect, self).__init__()
397
+ self.nc = nc # number of classes
398
+ self.no = nc + 5 # number of outputs per anchor
399
+ self.nl = len(anchors) # number of detection layers
400
+ self.na = len(anchors[0]) // 2 # number of anchors
401
+ self.grid = [torch.zeros(1)] * self.nl # init grid
402
+ a = torch.tensor(anchors).float().view(self.nl, -1, 2)
403
+ self.register_buffer("anchors", a) # shape(nl,na,2)
404
+ self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2)
405
+ self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
406
+
407
+ self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
408
+ self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)
409
+
410
+ def forward(self, x):
411
+ # x = x.copy() # for profiling
412
+ z = [] # inference output
413
+ self.training |= self.export
414
+ for i in range(self.nl):
415
+ x[i] = self.m[i](self.ia[i](x[i])) # conv
416
+ x[i] = self.im[i](x[i])
417
+ bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
418
+ x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
419
+
420
+ if not self.training: # inference
421
+ if self.grid[i].shape[2:4] != x[i].shape[2:4]:
422
+ self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
423
+
424
+ y = x[i].sigmoid()
425
+ y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i] # xy
426
+ y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
427
+ z.append(y.view(bs, -1, self.no))
428
+
429
+ return x if self.training else (torch.cat(z, 1), x)
model/yolo.py CHANGED
@@ -1,15 +1,31 @@
 
 
 
 
1
  import torch.nn as nn
2
  from loguru import logger
3
- from typing import Dict, Any
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
  class YOLO(nn.Module):
7
  """
8
  A preliminary YOLO (You Only Look Once) model class still under development.
9
 
10
- This class is intended to define a YOLO model for object detection tasks. It is
11
- currently not implemented and serves as a placeholder for future development.
12
-
13
  Parameters:
14
  model_cfg: Configuration for the YOLO model. Expected to define the layers,
15
  parameters, and any other relevant configuration details.
@@ -17,9 +33,70 @@ class YOLO(nn.Module):
17
 
18
  def __init__(self, model_cfg: Dict[str, Any]):
19
  super(YOLO, self).__init__()
20
- # Placeholder for initialization logic
21
- print(model_cfg)
22
- raise NotImplementedError("Constructor not implemented.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  def get_model(model_cfg: dict) -> YOLO:
@@ -32,4 +109,11 @@ def get_model(model_cfg: dict) -> YOLO:
32
  YOLO: An instance of the model defined by the given configuration.
33
  """
34
  model = YOLO(model_cfg)
 
35
  return model
 
 
 
 
 
 
 
1
+ import inspect
2
+ from typing import Any, Dict, List, Union
3
+
4
+ import torch
5
  import torch.nn as nn
6
  from loguru import logger
7
+
8
+ from model import module
9
+ from utils.tools import load_model_cfg
10
+
11
+
12
+ def get_layer_map():
13
+ """
14
+ Dynamically generates a dictionary mapping class names to classes,
15
+ filtering to include only those that are subclasses of nn.Module,
16
+ ensuring they are relevant neural network layers.
17
+ """
18
+ layer_map = {}
19
+ for name, obj in inspect.getmembers(module, inspect.isclass):
20
+ if issubclass(obj, nn.Module) and obj is not nn.Module:
21
+ layer_map[name] = obj
22
+ return layer_map
23
 
24
 
25
  class YOLO(nn.Module):
26
  """
27
  A preliminary YOLO (You Only Look Once) model class still under development.
28
 
 
 
 
29
  Parameters:
30
  model_cfg: Configuration for the YOLO model. Expected to define the layers,
31
  parameters, and any other relevant configuration details.
 
33
 
34
  def __init__(self, model_cfg: Dict[str, Any]):
35
  super(YOLO, self).__init__()
36
+ self.nc = model_cfg["nc"]
37
+ self.layer_map = get_layer_map() # Get the map Dict[str: Module]
38
+ self.build_model(model_cfg["model"])
39
+
40
+ def build_model(self, model_arch: Dict[str, List[Dict[str, Dict[str, Dict]]]]):
41
+ model_list = nn.ModuleList()
42
+ output_dim = [3]
43
+ layer_indices_by_tag = {}
44
+
45
+ for arch_name, arch in model_arch.items():
46
+ logger.info(f"πŸ—οΈ Building model-{arch_name}")
47
+ for layer_idx, layer_spec in enumerate(arch, start=1):
48
+ layer_type, layer_info = next(iter(layer_spec.items()))
49
+ layer_args = layer_info.get("args", {})
50
+ source = layer_info.get("source", -1)
51
+
52
+ if isinstance(source, str):
53
+ source = layer_indices_by_tag[source]
54
+ if "Conv" in layer_type:
55
+ layer_args["in_channels"] = output_dim[source]
56
+ if "Detect" in layer_type:
57
+ layer_args["nc"] = self.nc
58
+ layer_args["ch"] = [output_dim[idx] for idx in source]
59
+
60
+ layer = self.create_layer(layer_type, source, **layer_args)
61
+ model_list.append(layer)
62
+
63
+ if "tags" in layer_info:
64
+ if layer_info["tags"] in layer_indices_by_tag:
65
+ raise ValueError(f"Duplicate tag '{layer_info['tags']}' found.")
66
+ layer_indices_by_tag[layer_info["tags"]] = layer_idx
67
+
68
+ out_channels = self.get_out_channels(layer_type, layer_args, output_dim, source)
69
+ output_dim.append(out_channels)
70
+ self.model = model_list
71
+
72
+ def forward(self, x):
73
+ y = [x]
74
+ for layer in self.model:
75
+ if isinstance(layer.source, list):
76
+ model_input = [y[idx] for idx in layer.source]
77
+ else:
78
+ model_input = y[layer.source]
79
+ x = layer(model_input)
80
+ y.append(x)
81
+ return x
82
+
83
+ def get_out_channels(self, layer_type: str, layer_args: dict, output_dim: list, source: Union[int, list]):
84
+ if "Conv" in layer_type:
85
+ return layer_args["out_channels"]
86
+ if layer_type in ["MaxPool", "UpSample"]:
87
+ return output_dim[source]
88
+ if layer_type == "Concat":
89
+ return sum(output_dim[idx] for idx in source)
90
+ if layer_type == "IDetect":
91
+ return None
92
+
93
+ def create_layer(self, layer_type: str, source: Union[int, list], **kwargs):
94
+ if layer_type in self.layer_map:
95
+ layer = self.layer_map[layer_type](**kwargs)
96
+ layer.source = source
97
+ return layer
98
+ else:
99
+ raise ValueError(f"Unsupported layer type: {layer_type}")
100
 
101
 
102
  def get_model(model_cfg: dict) -> YOLO:
 
109
  YOLO: An instance of the model defined by the given configuration.
110
  """
111
  model = YOLO(model_cfg)
112
+ logger.info("βœ… Success load model")
113
  return model
114
+
115
+
116
+ if __name__ == "__main__":
117
+ model_cfg = load_model_cfg("v7-base")
118
+
119
+ YOLO(model_cfg)
train.py CHANGED
@@ -4,11 +4,14 @@ from model.yolo import get_model
4
  from utils.tools import load_model_cfg, custom_logger
5
  import hydra
6
  from config.config import Config
 
7
 
8
 
9
  @hydra.main(config_path="config", config_name="config", version_base=None)
10
  def main(cfg: Config):
 
11
  model = get_model(cfg.model)
 
12
 
13
 
14
  if __name__ == "__main__":
 
4
  from utils.tools import load_model_cfg, custom_logger
5
  import hydra
6
  from config.config import Config
7
+ from omegaconf import OmegaConf
8
 
9
 
10
  @hydra.main(config_path="config", config_name="config", version_base=None)
11
  def main(cfg: Config):
12
+ OmegaConf.set_struct(cfg, False)
13
  model = get_model(cfg.model)
14
+ logger.info("Success load model")
15
 
16
 
17
  if __name__ == "__main__":