henry000 commited on
Commit
877244a
Β·
2 Parent(s): 814608f 592f75d

πŸ”€ [Merge] branch 'MODEL' into TEST

Browse files
yolo/config/model/v9-c-seg.yaml ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: v9-c-seg
2
+
3
+ anchor:
4
+ reg_max: 16
5
+ strides: [8, 16, 32]
6
+
7
+ model:
8
+ backbone:
9
+ - Conv:
10
+ args: {out_channels: 64, kernel_size: 3, stride: 2}
11
+ source: 0
12
+ - Conv:
13
+ args: {out_channels: 128, kernel_size: 3, stride: 2}
14
+ - RepNCSPELAN:
15
+ args: {out_channels: 256, part_channels: 128}
16
+
17
+ - ADown:
18
+ args: {out_channels: 256}
19
+ - RepNCSPELAN:
20
+ args: {out_channels: 512, part_channels: 256}
21
+ tags: B3
22
+
23
+ - ADown:
24
+ args: {out_channels: 512}
25
+ - RepNCSPELAN:
26
+ args: {out_channels: 512, part_channels: 512}
27
+ tags: B4
28
+
29
+ - ADown:
30
+ args: {out_channels: 512}
31
+ - RepNCSPELAN:
32
+ args: {out_channels: 512, part_channels: 512}
33
+ tags: B5
34
+
35
+ neck:
36
+ - SPPELAN:
37
+ args: {out_channels: 512}
38
+ tags: N3
39
+
40
+ - UpSample:
41
+ args: {scale_factor: 2, mode: nearest}
42
+ - Concat:
43
+ source: [-1, B4]
44
+ - RepNCSPELAN:
45
+ args: {out_channels: 512, part_channels: 512}
46
+ tags: N4
47
+
48
+ - UpSample:
49
+ args: {scale_factor: 2, mode: nearest}
50
+ - Concat:
51
+ source: [-1, B3]
52
+
53
+ head:
54
+ - RepNCSPELAN:
55
+ args: {out_channels: 256, part_channels: 256}
56
+ tags: P3
57
+
58
+ - ADown:
59
+ args: {out_channels: 256}
60
+ - Concat:
61
+ source: [-1, N4]
62
+ - RepNCSPELAN:
63
+ args: {out_channels: 512, part_channels: 512}
64
+ tags: P4
65
+
66
+ - ADown:
67
+ args: {out_channels: 512}
68
+ - Concat:
69
+ source: [-1, N3]
70
+ - RepNCSPELAN:
71
+ args: {out_channels: 512, part_channels: 512}
72
+ tags: P5
73
+
74
+ detection:
75
+ - RepNCSPELAN:
76
+ source: P3
77
+ args: {out_channels: 256, part_channels: 256, csp_args: {repeat_num: 2}}
78
+ - UpSample:
79
+ args: {scale_factor: 2, mode: nearest}
80
+ - Conv:
81
+ args: {out_channels: 256, kernel_size: 3}
82
+
83
+ - MultiheadSegmentation:
84
+ source: [P3, P4, P5, -1]
85
+ args: {num_maskes: 32}
86
+ tags: Main
87
+ output: True
88
+
89
+ auxiliary:
90
+ - CBLinear:
91
+ source: B3
92
+ args: {out_channels: [256]}
93
+ tags: R3
94
+ - CBLinear:
95
+ source: B4
96
+ args: {out_channels: [256, 512]}
97
+ tags: R4
98
+ - CBLinear:
99
+ source: B5
100
+ args: {out_channels: [256, 512, 512]}
101
+ tags: R5
102
+
103
+ - Conv:
104
+ args: {out_channels: 64, kernel_size: 3, stride: 2}
105
+ source: 0
106
+ - Conv:
107
+ args: {out_channels: 128, kernel_size: 3, stride: 2}
108
+ - RepNCSPELAN:
109
+ args: {out_channels: 256, part_channels: 128}
110
+
111
+ - ADown:
112
+ args: {out_channels: 256}
113
+ - CBFuse:
114
+ source: [R3, R4, R5, -1]
115
+ args: {index: [0, 0, 0]}
116
+ - RepNCSPELAN:
117
+ args: {out_channels: 512, part_channels: 256}
118
+ tags: A3
119
+
120
+ - ADown:
121
+ args: {out_channels: 512}
122
+ - CBFuse:
123
+ source: [R4, R5, -1]
124
+ args: {index: [1, 1]}
125
+ - RepNCSPELAN:
126
+ args: {out_channels: 512, part_channels: 512}
127
+ tags: A4
128
+
129
+ - ADown:
130
+ args: {out_channels: 512}
131
+ - CBFuse:
132
+ source: [R5, -1]
133
+ args: {index: [2]}
134
+ - RepNCSPELAN:
135
+ args: {out_channels: 512, part_channels: 512}
136
+ tags: A5
137
+
138
+ - RepNCSPELAN:
139
+ source: A3
140
+ args: {out_channels: 512, part_channels: 256, csp_args: {repeat_num: 2}}
141
+
142
+ - UpSample:
143
+ args: {scale_factor: 2, mode: nearest}
144
+ - Conv:
145
+ args: {out_channels: 256, kernel_size: 3}
146
+
147
+ - MultiheadSegmentation:
148
+ source: [A3, A4, A5, -1]
149
+ args: {num_maskes: 32}
150
+ tags: AUX
151
+ output: True
yolo/model/module.py CHANGED
@@ -81,7 +81,7 @@ class Detection(nn.Module):
81
  self.anc2vec = Anchor2Vec(reg_max=reg_max)
82
 
83
  self.anchor_conv[-1].bias.data.fill_(1.0)
84
- self.class_conv[-1].bias.data.fill_(-10)
85
 
86
  def forward(self, x: Tensor) -> Tuple[Tensor]:
87
  anchor_x = self.anchor_conv(x)
@@ -130,6 +130,38 @@ class MultiheadDetection(nn.Module):
130
  return [head(x) for x, head in zip(x_list, self.heads)]
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  class Anchor2Vec(nn.Module):
134
  def __init__(self, reg_max: int = 16) -> None:
135
  super().__init__()
 
81
  self.anc2vec = Anchor2Vec(reg_max=reg_max)
82
 
83
  self.anchor_conv[-1].bias.data.fill_(1.0)
84
+ self.class_conv[-1].bias.data.fill_(-10) # TODO: math.log(5 * 4 ** idx / 80 ** 3)
85
 
86
  def forward(self, x: Tensor) -> Tuple[Tensor]:
87
  anchor_x = self.anchor_conv(x)
 
130
  return [head(x) for x, head in zip(x_list, self.heads)]
131
 
132
 
133
+ class Segmentation(nn.Module):
134
+ def __init__(self, in_channels: Tuple[int], num_maskes: int):
135
+ super().__init__()
136
+ first_neck, in_channels = in_channels
137
+
138
+ mask_neck = max(first_neck // 4, num_maskes)
139
+ self.mask_conv = nn.Sequential(
140
+ Conv(in_channels, mask_neck, 3), Conv(mask_neck, mask_neck, 3), nn.Conv2d(mask_neck, num_maskes, 1)
141
+ )
142
+
143
+ def forward(self, x: Tensor) -> Tuple[Tensor]:
144
+ x = self.mask_conv(x)
145
+ return x
146
+
147
+
148
+ class MultiheadSegmentation(nn.Module):
149
+ """Mutlihead Segmentation module for Dual segment or Triple segment"""
150
+
151
+ def __init__(self, in_channels: List[int], num_classes: int, num_maskes: int, **head_kwargs):
152
+ super().__init__()
153
+ mask_channels, proto_channels = in_channels[:-1], in_channels[-1]
154
+
155
+ self.detect = MultiheadDetection(mask_channels, num_classes, **head_kwargs)
156
+ self.heads = nn.ModuleList(
157
+ [Segmentation((in_channels[0], in_channel), num_maskes) for in_channel in mask_channels]
158
+ )
159
+ self.heads.append(Conv(proto_channels, num_maskes, 1))
160
+
161
+ def forward(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
162
+ return [head(x) for x, head in zip(x_list, self.heads)]
163
+
164
+
165
  class Anchor2Vec(nn.Module):
166
  def __init__(self, reg_max: int = 16) -> None:
167
  super().__init__()
yolo/model/yolo.py CHANGED
@@ -45,7 +45,7 @@ class YOLO(nn.Module):
45
  # Find in channels
46
  if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
47
  layer_args["in_channels"] = output_dim[source]
48
- if "Detection" in layer_type:
49
  layer_args["in_channels"] = [output_dim[idx] for idx in source]
50
  layer_args["num_classes"] = self.num_classes
51
  layer_args["reg_max"] = self.reg_max
 
45
  # Find in channels
46
  if any(module in layer_type for module in ["Conv", "ELAN", "ADown", "AConv", "CBLinear"]):
47
  layer_args["in_channels"] = output_dim[source]
48
+ if "Detection" in layer_type or "Segmentation" in layer_type:
49
  layer_args["in_channels"] = [output_dim[idx] for idx in source]
50
  layer_args["num_classes"] = self.num_classes
51
  layer_args["reg_max"] = self.reg_max
yolo/tools/format_converters.py CHANGED
@@ -83,3 +83,55 @@ def convert_weight_v7(old_state_dict, new_state_dict):
83
  assert new_shape == old_shape, "Weight Shape Mismatch!! {old_key_name}"
84
  new_state_dict[new_key_name] = old_state_dict[old_key_name]
85
  return new_state_dict
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  assert new_shape == old_shape, "Weight Shape Mismatch!! {old_key_name}"
84
  new_state_dict[new_key_name] = old_state_dict[old_key_name]
85
  return new_state_dict
86
+
87
+
88
+ replace_dict = {"cv": "conv", ".m.": ".bottleneck."}
89
+
90
+
91
+ def convert_weight_seg(old_state_dict, new_state_dict):
92
+ diff = -1
93
+ for old_weight_name in old_state_dict.keys():
94
+ old_idx = int(old_weight_name.split(".")[1])
95
+ if old_idx == 23:
96
+ diff = 3
97
+ elif old_idx == 41:
98
+ diff = -19
99
+ new_idx = old_idx + diff
100
+ new_weight_name = old_weight_name.replace(f".{old_idx}.", f".{new_idx}.")
101
+ for key, val in replace_dict.items():
102
+ new_weight_name = new_weight_name.replace(key, val)
103
+
104
+ if new_weight_name not in new_state_dict.keys():
105
+ heads = "heads"
106
+ _, _, conv_name, conv_idx, *details = old_weight_name.split(".")
107
+ if "proto" in conv_name:
108
+ conv_idx = "3"
109
+ new_weight_name = ".".join(["model", str(layer_idx), heads, conv_task, *details])
110
+ continue
111
+ if "dfl" in old_weight_name:
112
+ continue
113
+ if conv_name == "cv2" or conv_name == "cv3" or conv_name == "cv6":
114
+ layer_idx = 44
115
+ heads = "detect.heads"
116
+ if conv_name == "cv4" or conv_name == "cv5" or conv_name == "cv7":
117
+ layer_idx = 25
118
+ heads = "detect.heads"
119
+
120
+ if conv_name == "cv2" or conv_name == "cv4":
121
+ conv_task = "anchor_conv"
122
+ if conv_name == "cv3" or conv_name == "cv5":
123
+ conv_task = "class_conv"
124
+ if conv_name == "cv6" or conv_name == "cv7":
125
+ conv_task = "mask_conv"
126
+ heads = "heads"
127
+
128
+ new_weight_name = ".".join(["model", str(layer_idx), heads, conv_idx, conv_task, *details])
129
+
130
+ if (
131
+ new_weight_name not in new_state_dict.keys()
132
+ or new_state_dict[new_weight_name].shape != old_state_dict[old_weight_name].shape
133
+ ):
134
+ print(f"new: {new_weight_name}, old: {old_weight_name}")
135
+ print(f"{new_state_dict[new_weight_name].shape} {old_state_dict[old_weight_name].shape}")
136
+ new_state_dict[new_weight_name] = old_state_dict[old_weight_name]
137
+ return new_state_dict