Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Jul 6, 2024

Commit

d13852b

1 Parent(s): 68d5954

✨ [New] YOLOv7 structure! enable build model

Browse files

Files changed (2) hide show

yolo/config/model/v7-base.yaml +26 -17
yolo/model/module.py +90 -1

yolo/config/model/v7-base.yaml CHANGED Viewed

@@ -1,11 +1,17 @@
 anchor:
-  reg_max: 16
   strides: [8, 16, 32]
 model:
   backbone:
   - Conv:
       args: {out_channels: 32, kernel_size: 3}
   - Conv:
       args: {out_channels: 64, kernel_size: 3, stride: 2}
   - Conv:
@@ -55,7 +61,7 @@ model:
       args: {out_channels: 128, kernel_size: 3}
   - Concat:
       source: [-1, -3, -5, -6]
-      tags: 8x
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - Pool:
@@ -86,7 +92,7 @@ model:
       source: [-1, -3, -5, -6]
   - Conv:
       args: {out_channels: 1024, kernel_size: 1}
-      tags: 16x
   - Pool:
       args: {padding: 0}
   - Conv:
@@ -115,17 +121,18 @@ model:
       source: [-1, -3, -5, -6]
   - Conv:
       args: {out_channels: 1024, kernel_size: 1}
-      tags: 32x
   head:
   - SPPCSPConv:
       args: {out_channels: 512}
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - UpSample:
       args: {scale_factor: 2}
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
-      source: 16x
   - Concat:
       source: [-1, -2]
   - Conv:
@@ -145,13 +152,14 @@ model:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
   - UpSample:
       args: {scale_factor: 2}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
-      source: 8x
   - Concat:
       source: [-1, -2]
   - Conv:
@@ -171,6 +179,7 @@ model:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
   - Pool:
       args: {padding: 0}
   - Conv:
@@ -181,7 +190,7 @@ model:
   - Conv:
       args: {out_channels: 128, kernel_size: 3, stride: 2}
   - Concat:
-      source: [-1, -3, 63]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
@@ -199,6 +208,7 @@ model:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Pool:
       args: {padding: 0}
   - Conv:
@@ -209,7 +219,7 @@ model:
   - Conv:
       args: {out_channels: 256, kernel_size: 3, stride: 2}
   - Concat:
-      source: [-1, -3, 51]
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - Conv:
@@ -227,20 +237,19 @@ model:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - RepConv:
       args: {out_channels: 256}
-      source: 75
   - RepConv:
       args: {out_channels: 512}
-      source: 88
   - RepConv:
       args: {out_channels: 1024}
-      source: 101
-  - IDetect:
       args:
-        anchors:
-            - [12,16, 19,36, 40,28]  # P3/8
-            - [36,75, 76,55, 72,146]  # P4/16
-            - [142,110, 192,243, 459,401]  # P5/32
-      source: [102, 103, 104]
       output: True

+name: v7-base
 anchor:
+  anchor:
+  - [12,16, 19,36, 40,28]  # P5/8
+  - [36,75, 76,55, 72,146]  # P4/16
+  - [142,110, 192,243, 459,401]  # P5/32
   strides: [8, 16, 32]
 model:
   backbone:
   - Conv:
       args: {out_channels: 32, kernel_size: 3}
+      source: 0
   - Conv:
       args: {out_channels: 64, kernel_size: 3, stride: 2}
   - Conv:
       args: {out_channels: 128, kernel_size: 3}
   - Concat:
       source: [-1, -3, -5, -6]
+      tags: B3
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - Pool:
       source: [-1, -3, -5, -6]
   - Conv:
       args: {out_channels: 1024, kernel_size: 1}
+      tags: B4
   - Pool:
       args: {padding: 0}
   - Conv:
       source: [-1, -3, -5, -6]
   - Conv:
       args: {out_channels: 1024, kernel_size: 1}
+      tags: B5
   head:
   - SPPCSPConv:
       args: {out_channels: 512}
+      tags: N3
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - UpSample:
       args: {scale_factor: 2}
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
+      source: B4
   - Concat:
       source: [-1, -2]
   - Conv:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
+      tags: N2
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
   - UpSample:
       args: {scale_factor: 2}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
+      source: B3
   - Concat:
       source: [-1, -2]
   - Conv:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
+      tags: P3
   - Pool:
       args: {padding: 0}
   - Conv:
   - Conv:
       args: {out_channels: 128, kernel_size: 3, stride: 2}
   - Concat:
+      source: [-1, -3, N2]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
+      tags: P4
   - Pool:
       args: {padding: 0}
   - Conv:
   - Conv:
       args: {out_channels: 256, kernel_size: 3, stride: 2}
   - Concat:
+      source: [-1, -3, N3]
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - Conv:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
+      tags: P5
   - RepConv:
       args: {out_channels: 256}
+      source: P3
   - RepConv:
       args: {out_channels: 512}
+      source: P4
   - RepConv:
       args: {out_channels: 1024}
+      source: P5
+  - MultiheadDetection:
       args:
+        version: v7
+      source: [-3, -2, -1]
       output: True
+      tags: Main

yolo/model/module.py CHANGED Viewed

@@ -91,13 +91,40 @@ class Detection(nn.Module):
         return class_x, anchor_x, vector_x
 class MultiheadDetection(nn.Module):
     """Mutlihead Detection module for Dual detect or Triple detect"""
     def __init__(self, in_channels: List[int], num_classes: int, **head_kwargs):
         super().__init__()
         self.heads = nn.ModuleList(
-            [Detection((in_channels[0], in_channel), num_classes, **head_kwargs) for in_channel in in_channels]
         )
     def forward(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
@@ -320,6 +347,32 @@ class CBLinear(nn.Module):
         return x.split(self.out_channels, dim=1)
 class SPPELAN(nn.Module):
     """SPPELAN module comprising multiple pooling and convolution layers."""
@@ -360,3 +413,39 @@ class CBFuse(nn.Module):
         res = [F.interpolate(x[pick_id], size=target_size, mode=self.mode) for pick_id, x in zip(self.idx, x_list)]
         out = torch.stack(res + [target]).sum(dim=0)
         return out

         return class_x, anchor_x, vector_x
+class IDetection(nn.Module):
+    def __init__(self, in_channels: Tuple[int], num_classes: int, *args, anchor_num: int = 3, **kwargs):
+        super().__init__()
+        if isinstance(in_channels, tuple):
+            in_channels = in_channels[1]
+        out_channel = num_classes + 5
+        out_channels = out_channel * anchor_num
+        self.head_conv = nn.Conv2d(in_channels, out_channels, 1)
+        self.implicit_a = ImplicitA(in_channels)
+        self.implicit_m = ImplicitM(out_channels)
+    def forward(self, x):
+        x = self.implicit_a(x)
+        x = self.head_conv(x)
+        x = self.implicit_m(x)
+        return x
 class MultiheadDetection(nn.Module):
     """Mutlihead Detection module for Dual detect or Triple detect"""
     def __init__(self, in_channels: List[int], num_classes: int, **head_kwargs):
         super().__init__()
+        DetectionHead = Detection
+        if head_kwargs.pop("version", None) == "v7":
+            DetectionHead = IDetection
         self.heads = nn.ModuleList(
+            [DetectionHead((in_channels[0], in_channel), num_classes, **head_kwargs) for in_channel in in_channels]
         )
     def forward(self, x_list: List[torch.Tensor]) -> List[torch.Tensor]:
         return x.split(self.out_channels, dim=1)
+class SPPCSPConv(nn.Module):
+    # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, in_channels: int, out_channels: int, expand: float = 0.5, kernel_sizes: Tuple[int] = (5, 9, 13)):
+        super().__init__()
+        neck_channels = int(2 * out_channels * expand)
+        self.pre_conv = nn.Sequential(
+            Conv(in_channels, neck_channels, 1),
+            Conv(neck_channels, neck_channels, 3),
+            Conv(neck_channels, neck_channels, 1),
+        )
+        self.short_conv = Conv(in_channels, neck_channels, 1)
+        self.pools = nn.ModuleList([Pool(kernel_size=kernel_size, stride=1) for kernel_size in kernel_sizes])
+        self.post_conv = nn.Sequential(Conv(4 * neck_channels, neck_channels, 1), Conv(neck_channels, neck_channels, 3))
+        self.merge_conv = Conv(2 * neck_channels, out_channels, 1)
+    def forward(self, x):
+        features = [self.pre_conv(x)]
+        for pool in self.pools:
+            features.append(pool(features[-1]))
+        features = torch.cat(features, dim=1)
+        y1 = self.post_conv(features)
+        y2 = self.short_conv(x)
+        y = torch.cat((y1, y2), dim=1)
+        return self.merge_conv(y)
 class SPPELAN(nn.Module):
     """SPPELAN module comprising multiple pooling and convolution layers."""
         res = [F.interpolate(x[pick_id], size=target_size, mode=self.mode) for pick_id, x in zip(self.idx, x_list)]
         out = torch.stack(res + [target]).sum(dim=0)
         return out
+class ImplicitA(nn.Module):
+    """
+    Implement YOLOR - implicit knowledge(Add), paper: https://arxiv.org/abs/2105.04206
+    """
+    def __init__(self, channel: int, mean: float = 0.0, std: float = 0.02):
+        super().__init__()
+        self.channel = channel
+        self.mean = mean
+        self.std = std
+        self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
+        nn.init.normal_(self.implicit, mean=mean, std=self.std)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.implicit + x
+class ImplicitM(nn.Module):
+    """
+    Implement YOLOR - implicit knowledge(multiply), paper: https://arxiv.org/abs/2105.04206
+    """
+    def __init__(self, channel: int, mean: float = 1.0, std: float = 0.02):
+        super().__init__()
+        self.channel = channel
+        self.mean = mean
+        self.std = std
+        self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
+        nn.init.normal_(self.implicit, mean=self.mean, std=self.std)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.implicit * x