✨ [New] Add a classification head!
Browse files- yolo/config/model/v9-c-cls.yaml +36 -0
- yolo/model/module.py +15 -0
- yolo/model/yolo.py +5 -2
yolo/config/model/v9-c-cls.yaml
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: v9-c-cls
|
2 |
+
|
3 |
+
anchor:
|
4 |
+
reg_max: 16
|
5 |
+
strides: [8, 16, 32]
|
6 |
+
|
7 |
+
model:
|
8 |
+
backbone:
|
9 |
+
- Conv:
|
10 |
+
args: {out_channels: 64, kernel_size: 3, stride: 2}
|
11 |
+
source: 0
|
12 |
+
- Conv:
|
13 |
+
args: {out_channels: 128, kernel_size: 3, stride: 2}
|
14 |
+
- RepNCSPELAN:
|
15 |
+
args: {out_channels: 256, part_channels: 128}
|
16 |
+
|
17 |
+
- ADown:
|
18 |
+
args: {out_channels: 256}
|
19 |
+
- RepNCSPELAN:
|
20 |
+
args: {out_channels: 512, part_channels: 256}
|
21 |
+
|
22 |
+
- ADown:
|
23 |
+
args: {out_channels: 512}
|
24 |
+
- RepNCSPELAN:
|
25 |
+
args: {out_channels: 512, part_channels: 512}
|
26 |
+
|
27 |
+
- ADown:
|
28 |
+
args: {out_channels: 512}
|
29 |
+
- RepNCSPELAN:
|
30 |
+
args: {out_channels: 512, part_channels: 512}
|
31 |
+
|
32 |
+
detection:
|
33 |
+
- Classification:
|
34 |
+
source: -1
|
35 |
+
tags: Main
|
36 |
+
output: True
|
yolo/model/module.py
CHANGED
@@ -130,6 +130,7 @@ class MultiheadDetection(nn.Module):
|
|
130 |
return [head(x) for x, head in zip(x_list, self.heads)]
|
131 |
|
132 |
|
|
|
133 |
class Segmentation(nn.Module):
|
134 |
def __init__(self, in_channels: Tuple[int], num_maskes: int):
|
135 |
super().__init__()
|
@@ -176,6 +177,20 @@ class Anchor2Vec(nn.Module):
|
|
176 |
return anchor_x, vector_x
|
177 |
|
178 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
# ----------- Backbone Class ----------- #
|
180 |
class RepConv(nn.Module):
|
181 |
"""A convolutional block that combines two convolution layers (kernel and point-wise)."""
|
|
|
130 |
return [head(x) for x, head in zip(x_list, self.heads)]
|
131 |
|
132 |
|
133 |
+
# ----------- Segmentation Class ----------- #
|
134 |
class Segmentation(nn.Module):
|
135 |
def __init__(self, in_channels: Tuple[int], num_maskes: int):
|
136 |
super().__init__()
|
|
|
177 |
return anchor_x, vector_x
|
178 |
|
179 |
|
180 |
+
# ----------- Classification Class ----------- #
|
181 |
+
class Classification(nn.Module):
|
182 |
+
def __init__(self, in_channel: int, num_classes: int, *, neck_channels=1024, **head_args):
|
183 |
+
super().__init__()
|
184 |
+
self.conv = Conv(in_channel, neck_channels, 1)
|
185 |
+
self.pool = nn.AdaptiveAvgPool2d(1)
|
186 |
+
self.head = nn.Linear(neck_channels, num_classes)
|
187 |
+
|
188 |
+
def forward(self, x: Tensor) -> Tuple[Tensor]:
|
189 |
+
x = self.pool(self.conv(x))
|
190 |
+
x = self.head(x.flatten(start_dim=1))
|
191 |
+
return x
|
192 |
+
|
193 |
+
|
194 |
# ----------- Backbone Class ----------- #
|
195 |
class RepConv(nn.Module):
|
196 |
"""A convolutional block that combines two convolution layers (kernel and point-wise)."""
|
yolo/model/yolo.py
CHANGED
@@ -46,8 +46,11 @@ class YOLO(nn.Module):
|
|
46 |
# Find in channels
|
47 |
if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
|
48 |
layer_args["in_channels"] = output_dim[source]
|
49 |
-
if
|
50 |
-
|
|
|
|
|
|
|
51 |
layer_args["num_classes"] = self.num_classes
|
52 |
layer_args["reg_max"] = self.reg_max
|
53 |
|
|
|
46 |
# Find in channels
|
47 |
if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
|
48 |
layer_args["in_channels"] = output_dim[source]
|
49 |
+
if any(module in layer_type for module in ["Detection", "Segmentation", "Classification"]):
|
50 |
+
if isinstance(source, list):
|
51 |
+
layer_args["in_channels"] = [output_dim[idx] for idx in source]
|
52 |
+
else:
|
53 |
+
layer_args["in_channel"] = output_dim[source]
|
54 |
layer_args["num_classes"] = self.num_classes
|
55 |
layer_args["reg_max"] = self.reg_max
|
56 |
|