Spaces:

henry000
/

YOLO

Running

App Files Files Community

henry000 commited on Apr 22, 2024

Commit

787b81d

2 Parent(s): 5fed6e3 183312f

🔀 [Merge] branch 'MODEL' into SETUP

Browse files

Files changed (4) hide show

config/model/v7-base.yaml +26 -22
model/module.py +210 -48
model/yolo.py +91 -7
train.py +3 -0

config/model/v7-base.yaml CHANGED Viewed

@@ -1,5 +1,5 @@
-anchor:
-  [1, 2, 3]
 model:
   backbone:
   - Conv:
@@ -27,8 +27,8 @@ model:
       source: [-1, -3, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
-  - MP:
-      args: []
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
   - Conv:
@@ -56,8 +56,8 @@ model:
       tags: 8x
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
-  - MP:
-      args: []
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
@@ -85,8 +85,8 @@ model:
   - Conv:
       args: {out_channels: 1024, kernel_size: 1}
       tags: 16x
-  - MP:
-      args: []
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - Conv:
@@ -115,12 +115,12 @@ model:
       args: {out_channels: 1024, kernel_size: 1}
       tags: 32x
   head:
-  - SPPCSPC:
-      args: [512]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
-  - Upsample:
-      args: [None, 2, nearest]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
       source: 16x
@@ -145,8 +145,8 @@ model:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
-  - Upsample:
-      args: [None, 2, nearest]
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
       source: 8x
@@ -169,8 +169,8 @@ model:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
-  - MP:
-      args: []
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
   - Conv:
@@ -197,8 +197,8 @@ model:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
-  - MP:
-      args: []
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
@@ -226,14 +226,18 @@ model:
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - RepConv:
-      args: [256, 3, 1]
       source: 75
   - RepConv:
-      args: [512, 3, 1]
       source: 88
   - RepConv:
-      args: [1024, 3, 1]
       source: 101
   - IDetect:
-      args: [nc, anchors]
       source: [102, 103, 104]

+nc: 80
 model:
   backbone:
   - Conv:
       source: [-1, -3, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
+  - MaxPool:
+      args: {}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
   - Conv:
       tags: 8x
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
+  - MaxPool:
+      args: {}
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
   - Conv:
       args: {out_channels: 1024, kernel_size: 1}
       tags: 16x
+  - MaxPool:
+      args: {}
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - Conv:
       args: {out_channels: 1024, kernel_size: 1}
       tags: 32x
   head:
+  - SPPCSPConv:
+      args: {out_channels: 512}
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
+  - UpSample:
+      args: {scale_factor: 2}
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
       source: 16x
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
+  - UpSample:
+      args: {scale_factor: 2}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
       source: 8x
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
+  - MaxPool:
+      args: {}
   - Conv:
       args: {out_channels: 128, kernel_size: 1}
   - Conv:
       source: [-1, -2, -3, -4, -5, -6]
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
+  - MaxPool:
+      args: {}
   - Conv:
       args: {out_channels: 256, kernel_size: 1}
   - Conv:
   - Conv:
       args: {out_channels: 512, kernel_size: 1}
   - RepConv:
+      args: {out_channels: 256}
       source: 75
   - RepConv:
+      args: {out_channels: 512}
       source: 88
   - RepConv:
+      args: {out_channels: 1024}
       source: 101
   - IDetect:
+      args:
+        anchors:
+            - [12,16, 19,36, 40,28]  # P3/8
+            - [36,75, 76,55, 72,146]  # P4/16
+            - [142,110, 192,243, 459,401]  # P5/32
       source: [102, 103, 104]

model/module.py CHANGED Viewed

@@ -1,14 +1,25 @@
 import torch
 import torch.nn as nn
-# basic
 class Conv(nn.Module):
     # basic convlution
-    def __init__(self, in_channels, out_channels, kernel_size,
-                 stride=1, padding=0, dilation=1, groups=1, act=nn.ReLU(),
-                 bias=False, auto_padding=True, padding_mode='zeros'):
         super().__init__()
         # not yet handle the case when dilation is a tuple
@@ -18,7 +29,9 @@ class Conv(nn.Module):
             else:
                 padding = [(dilation * (k - 1) + 1) // 2 for k in kernel_size]
-        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, dilation=dilation, bias=bias)
         self.bn = nn.BatchNorm2d(out_channels)
         self.act = act if isinstance(act, nn.Module) else nn.Identity()
@@ -33,11 +46,9 @@ class Conv(nn.Module):
 # RepVGG
 class RepConv(nn.Module):
     # https://github.com/DingXiaoH/RepVGG
-    def __init__(self, in_channels, out_channels, kernel_size=3,
-                 stride=1, groups=1, act=nn.ReLU()):
         super().__init__()
@@ -56,11 +67,9 @@ class RepConv(nn.Module):
 # ResNet
 class Res(nn.Module):
     # ResNet bottleneck
-    def __init__(self, in_channels, out_channels,
-                 groups=1, act=nn.ReLU(), ratio=0.25):
         super().__init__()
@@ -75,8 +84,7 @@ class Res(nn.Module):
 class RepRes(nn.Module):
     # RepResNet bottleneck
-    def __init__(self, in_channels, out_channels,
-                 groups=1, act=nn.ReLU(), ratio=0.25):
         super().__init__()
@@ -91,14 +99,21 @@ class RepRes(nn.Module):
 class ConvBlock(nn.Module):
     # ConvBlock
-    def __init__(self, in_channels,
-                 repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
-        self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else Conv(in_channels, h_channels, 3, 1, act=act)
-        self.cb = nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
@@ -107,14 +122,21 @@ class ConvBlock(nn.Module):
 class RepConvBlock(nn.Module):
     # ConvBlock
-    def __init__(self, in_channels,
-                 repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
-        self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else RepConv(in_channels, h_channels, 3, 1, act=act)
-        self.cb = nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
@@ -123,14 +145,21 @@ class RepConvBlock(nn.Module):
 class ResConvBlock(nn.Module):
     # ResConvBlock
-    def __init__(self, in_channels,
-                 repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
-        self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else Conv(in_channels, h_channels, 3, 1, act=act)
-        self.cb = nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
@@ -139,14 +168,21 @@ class ResConvBlock(nn.Module):
 class ResRepConvBlock(nn.Module):
     # ResConvBlock
-    def __init__(self, in_channels,
-                 repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
-        self.cv1 = Conv(in_channels, in_channels, 3, 1, act=act) if repeat == 1 else RepConv(in_channels, h_channels, 3, 1, act=act)
-        self.cb = nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat-2))) if repeat > 2 else nn.Identity()
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
@@ -154,11 +190,9 @@ class ResRepConvBlock(nn.Module):
 # Darknet
 class Dark(nn.Module):
     # DarkNet bottleneck
-    def __init__(self, in_channels, out_channels,
-                 groups=1, act=nn.ReLU(), ratio=0.5):
         super().__init__()
@@ -172,8 +206,7 @@ class Dark(nn.Module):
 class RepDark(nn.Module):
     # RepDarkNet bottleneck
-    def __init__(self, in_channels, out_channels,
-                 groups=1, act=nn.ReLU(), ratio=0.5):
         super().__init__()
@@ -186,11 +219,9 @@ class RepDark(nn.Module):
 # CSPNet
 class CSP(nn.Module):
     # CSPNet
-    def __init__(self, in_channels, out_channels,
-                 repeat=1, cb_repeat=2, act=nn.ReLU(), ratio=1.0):
         super().__init__()
@@ -208,14 +239,15 @@ class CSP(nn.Module):
 class CSPDark(nn.Module):
     # CSPNet
-    def __init__(self, in_channels, out_channels,
-                 repeat=1, groups=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = in_channels // 2
         self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
-        self.cb = nn.Sequential(*(Dark(h_channels, h_channels, groups=groups, act=act, ratio=ratio) for _ in range(repeat)))
         self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)
     def forward(self, x):
@@ -226,18 +258,16 @@ class CSPDark(nn.Module):
 # ELAN
 class ELAN(nn.Module):
     # ELAN
-    def __init__(self, in_channels, out_channels, med_channels,
-                 elan_repeat=2, cb_repeat=2, ratio=1.0):
         super().__init__()
         h_channels = med_channels // 2
         self.cv1 = Conv(in_channels, med_channels, 1, 1)
         self.cb = nn.ModuleList(ConvBlock(h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
-        self.cv2 = Conv((2+elan_repeat) * h_channels, out_channels, 1, 1)
     def forward(self, x):
@@ -249,15 +279,14 @@ class ELAN(nn.Module):
 class CSPELAN(nn.Module):
     # ELAN
-    def __init__(self, in_channels, out_channels, med_channels,
-                 elan_repeat=2, cb_repeat=2, ratio=1.0):
         super().__init__()
         h_channels = med_channels // 2
         self.cv1 = Conv(in_channels, med_channels, 1, 1)
         self.cb = nn.ModuleList(CSP(h_channels, h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
-        self.cv2 = Conv((2+elan_repeat) * h_channels, out_channels, 1, 1)
     def forward(self, x):
@@ -265,3 +294,136 @@ class CSPELAN(nn.Module):
         y.extend((m(y[-1])) for m in self.cb)
         return self.cv2(torch.cat(y, 1))

 import torch
 import torch.nn as nn
+# basic
 class Conv(nn.Module):
     # basic convlution
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        act=nn.ReLU(),
+        bias=False,
+        auto_padding=True,
+        padding_mode="zeros",
+    ):
         super().__init__()
         # not yet handle the case when dilation is a tuple
             else:
                 padding = [(dilation * (k - 1) + 1) // 2 for k in kernel_size]
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, kernel_size, stride, padding, groups=groups, dilation=dilation, bias=bias
+        )
         self.bn = nn.BatchNorm2d(out_channels)
         self.act = act if isinstance(act, nn.Module) else nn.Identity()
 # RepVGG
 class RepConv(nn.Module):
     # https://github.com/DingXiaoH/RepVGG
+    def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, groups=1, act=nn.ReLU()):
         super().__init__()
 # ResNet
 class Res(nn.Module):
     # ResNet bottleneck
+    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):
         super().__init__()
 class RepRes(nn.Module):
     # RepResNet bottleneck
+    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):
         super().__init__()
 class ConvBlock(nn.Module):
     # ConvBlock
+    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
+        self.cv1 = (
+            Conv(in_channels, in_channels, 3, 1, act=act)
+            if repeat == 1
+            else Conv(in_channels, h_channels, 3, 1, act=act)
+        )
+        self.cb = (
+            nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
+            if repeat > 2
+            else nn.Identity()
+        )
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
 class RepConvBlock(nn.Module):
     # ConvBlock
+    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
+        self.cv1 = (
+            Conv(in_channels, in_channels, 3, 1, act=act)
+            if repeat == 1
+            else RepConv(in_channels, h_channels, 3, 1, act=act)
+        )
+        self.cb = (
+            nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
+            if repeat > 2
+            else nn.Identity()
+        )
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
 class ResConvBlock(nn.Module):
     # ResConvBlock
+    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
+        self.cv1 = (
+            Conv(in_channels, in_channels, 3, 1, act=act)
+            if repeat == 1
+            else Conv(in_channels, h_channels, 3, 1, act=act)
+        )
+        self.cb = (
+            nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
+            if repeat > 2
+            else nn.Identity()
+        )
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
 class ResRepConvBlock(nn.Module):
     # ResConvBlock
+    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = int(in_channels * ratio)
+        self.cv1 = (
+            Conv(in_channels, in_channels, 3, 1, act=act)
+            if repeat == 1
+            else RepConv(in_channels, h_channels, 3, 1, act=act)
+        )
+        self.cb = (
+            nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
+            if repeat > 2
+            else nn.Identity()
+        )
         self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)
     def forward(self, x):
 # Darknet
 class Dark(nn.Module):
     # DarkNet bottleneck
+    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):
         super().__init__()
 class RepDark(nn.Module):
     # RepDarkNet bottleneck
+    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):
         super().__init__()
 # CSPNet
 class CSP(nn.Module):
     # CSPNet
+    def __init__(self, in_channels, out_channels, repeat=1, cb_repeat=2, act=nn.ReLU(), ratio=1.0):
         super().__init__()
 class CSPDark(nn.Module):
     # CSPNet
+    def __init__(self, in_channels, out_channels, repeat=1, groups=1, act=nn.ReLU(), ratio=1.0):
         super().__init__()
         h_channels = in_channels // 2
         self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
+        self.cb = nn.Sequential(
+            *(Dark(h_channels, h_channels, groups=groups, act=act, ratio=ratio) for _ in range(repeat))
+        )
         self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)
     def forward(self, x):
 # ELAN
 class ELAN(nn.Module):
     # ELAN
+    def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):
         super().__init__()
         h_channels = med_channels // 2
         self.cv1 = Conv(in_channels, med_channels, 1, 1)
         self.cb = nn.ModuleList(ConvBlock(h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
+        self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)
     def forward(self, x):
 class CSPELAN(nn.Module):
     # ELAN
+    def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):
         super().__init__()
         h_channels = med_channels // 2
         self.cv1 = Conv(in_channels, med_channels, 1, 1)
         self.cb = nn.ModuleList(CSP(h_channels, h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
+        self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)
     def forward(self, x):
         y.extend((m(y[-1])) for m in self.cb)
         return self.cv2(torch.cat(y, 1))
+class Concat(nn.Module):
+    def __init__(self, dim=1):
+        super(Concat, self).__init__()
+        self.dim = dim
+    def forward(self, x):
+        return torch.cat(x, self.dim)
+class MaxPool(nn.Module):
+    def __init__(self, kernel_size: int = 2):
+        super().__init__()
+        self.pool_layer = nn.MaxPool2d(kernel_size=kernel_size, stride=kernel_size)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.pool_layer(x)
+# TODO: check if Mit
+class SPPCSPConv(nn.Module):
+    # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, in_channels, out_channels, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
+        super(SPPCSPConv, self).__init__()
+        c_ = int(2 * out_channels * e)  # hidden channels
+        self.cv1 = Conv(in_channels, c_, 1, 1)
+        self.cv2 = Conv(in_channels, c_, 1, 1)
+        self.cv3 = Conv(c_, c_, 3, 1)
+        self.cv4 = Conv(c_, c_, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+        self.cv5 = Conv(4 * c_, c_, 1, 1)
+        self.cv6 = Conv(c_, c_, 3, 1)
+        self.cv7 = Conv(2 * c_, out_channels, 1, 1)
+    def forward(self, x):
+        x1 = self.cv4(self.cv3(self.cv1(x)))
+        y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
+        y2 = self.cv2(x)
+        return self.cv7(torch.cat((y1, y2), dim=1))
+class ImplicitA(nn.Module):
+    """
+    Implement YOLOR - implicit knowledge(Add), paper: https://arxiv.org/abs/2105.04206
+    """
+    def __init__(self, channel: int, mean: float = 0.0, std: float = 0.02):
+        super().__init__()
+        self.channel = channel
+        self.mean = mean
+        self.std = std
+        self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
+        nn.init.normal_(self.implicit, mean=mean, std=self.std)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.implicit + x
+class ImplicitM(nn.Module):
+    """
+    Implement YOLOR - implicit knowledge(multiply), paper: https://arxiv.org/abs/2105.04206
+    """
+    def __init__(self, channel: int, mean: float = 1.0, std: float = 0.02):
+        super().__init__()
+        self.channel = channel
+        self.mean = mean
+        self.std = std
+        self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
+        nn.init.normal_(self.implicit, mean=self.mean, std=self.std)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.implicit * x
+class UpSample(nn.Module):
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.UpSample = nn.Upsample(**kwargs)
+    def forward(self, x):
+        return self.UpSample(x)
+class IDetect(nn.Module):
+    """
+    #TODO: Add Detect class, change IDetect base class
+    """
+    stride = None  # strides computed during build
+    export = False  # onnx export
+    end2end = False
+    include_nms = False
+    concat = False
+    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
+        super(IDetect, self).__init__()
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.zeros(1)] * self.nl  # init grid
+        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+        self.register_buffer("anchors", a)  # shape(nl,na,2)
+        self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
+        self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)
+    def forward(self, x):
+        # x = x.copy()  # for profiling
+        z = []  # inference output
+        self.training |= self.export
+        for i in range(self.nl):
+            x[i] = self.m[i](self.ia[i](x[i]))  # conv
+            x[i] = self.im[i](x[i])
+            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            if not self.training:  # inference
+                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
+                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
+                y = x[i].sigmoid()
+                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
+                z.append(y.view(bs, -1, self.no))
+        return x if self.training else (torch.cat(z, 1), x)

model/yolo.py CHANGED Viewed

@@ -1,15 +1,31 @@
 import torch.nn as nn
 from loguru import logger
-from typing import Dict, Any
 class YOLO(nn.Module):
     """
     A preliminary YOLO (You Only Look Once) model class still under development.
-    This class is intended to define a YOLO model for object detection tasks. It is
-    currently not implemented and serves as a placeholder for future development.
     Parameters:
         model_cfg: Configuration for the YOLO model. Expected to define the layers,
                    parameters, and any other relevant configuration details.
@@ -17,9 +33,70 @@ class YOLO(nn.Module):
     def __init__(self, model_cfg: Dict[str, Any]):
         super(YOLO, self).__init__()
-        # Placeholder for initialization logic
-        print(model_cfg)
-        raise NotImplementedError("Constructor not implemented.")
 def get_model(model_cfg: dict) -> YOLO:
@@ -32,4 +109,11 @@ def get_model(model_cfg: dict) -> YOLO:
         YOLO: An instance of the model defined by the given configuration.
     """
     model = YOLO(model_cfg)
     return model

+import inspect
+from typing import Any, Dict, List, Union
+import torch
 import torch.nn as nn
 from loguru import logger
+from model import module
+from utils.tools import load_model_cfg
+def get_layer_map():
+    """
+    Dynamically generates a dictionary mapping class names to classes,
+    filtering to include only those that are subclasses of nn.Module,
+    ensuring they are relevant neural network layers.
+    """
+    layer_map = {}
+    for name, obj in inspect.getmembers(module, inspect.isclass):
+        if issubclass(obj, nn.Module) and obj is not nn.Module:
+            layer_map[name] = obj
+    return layer_map
 class YOLO(nn.Module):
     """
     A preliminary YOLO (You Only Look Once) model class still under development.
     Parameters:
         model_cfg: Configuration for the YOLO model. Expected to define the layers,
                    parameters, and any other relevant configuration details.
     def __init__(self, model_cfg: Dict[str, Any]):
         super(YOLO, self).__init__()
+        self.nc = model_cfg["nc"]
+        self.layer_map = get_layer_map()  # Get the map Dict[str: Module]
+        self.build_model(model_cfg["model"])
+    def build_model(self, model_arch: Dict[str, List[Dict[str, Dict[str, Dict]]]]):
+        model_list = nn.ModuleList()
+        output_dim = [3]
+        layer_indices_by_tag = {}
+        for arch_name, arch in model_arch.items():
+            logger.info(f"🏗️  Building model-{arch_name}")
+            for layer_idx, layer_spec in enumerate(arch, start=1):
+                layer_type, layer_info = next(iter(layer_spec.items()))
+                layer_args = layer_info.get("args", {})
+                source = layer_info.get("source", -1)
+                if isinstance(source, str):
+                    source = layer_indices_by_tag[source]
+                if "Conv" in layer_type:
+                    layer_args["in_channels"] = output_dim[source]
+                if "Detect" in layer_type:
+                    layer_args["nc"] = self.nc
+                    layer_args["ch"] = [output_dim[idx] for idx in source]
+                layer = self.create_layer(layer_type, source, **layer_args)
+                model_list.append(layer)
+                if "tags" in layer_info:
+                    if layer_info["tags"] in layer_indices_by_tag:
+                        raise ValueError(f"Duplicate tag '{layer_info['tags']}' found.")
+                    layer_indices_by_tag[layer_info["tags"]] = layer_idx
+                out_channels = self.get_out_channels(layer_type, layer_args, output_dim, source)
+                output_dim.append(out_channels)
+        self.model = model_list
+    def forward(self, x):
+        y = [x]
+        for layer in self.model:
+            if isinstance(layer.source, list):
+                model_input = [y[idx] for idx in layer.source]
+            else:
+                model_input = y[layer.source]
+            x = layer(model_input)
+            y.append(x)
+        return x
+    def get_out_channels(self, layer_type: str, layer_args: dict, output_dim: list, source: Union[int, list]):
+        if "Conv" in layer_type:
+            return layer_args["out_channels"]
+        if layer_type in ["MaxPool", "UpSample"]:
+            return output_dim[source]
+        if layer_type == "Concat":
+            return sum(output_dim[idx] for idx in source)
+        if layer_type == "IDetect":
+            return None
+    def create_layer(self, layer_type: str, source: Union[int, list], **kwargs):
+        if layer_type in self.layer_map:
+            layer = self.layer_map[layer_type](**kwargs)
+            layer.source = source
+            return layer
+        else:
+            raise ValueError(f"Unsupported layer type: {layer_type}")
 def get_model(model_cfg: dict) -> YOLO:
         YOLO: An instance of the model defined by the given configuration.
     """
     model = YOLO(model_cfg)
+    logger.info("✅ Success load model")
     return model
+if __name__ == "__main__":
+    model_cfg = load_model_cfg("v7-base")
+    YOLO(model_cfg)

train.py CHANGED Viewed

@@ -4,11 +4,14 @@ from model.yolo import get_model
 from utils.tools import load_model_cfg, custom_logger
 import hydra
 from config.config import Config
 @hydra.main(config_path="config", config_name="config", version_base=None)
 def main(cfg: Config):
     model = get_model(cfg.model)
 if __name__ == "__main__":

 from utils.tools import load_model_cfg, custom_logger
 import hydra
 from config.config import Config
+from omegaconf import OmegaConf
 @hydra.main(config_path="config", config_name="config", version_base=None)
 def main(cfg: Config):
+    OmegaConf.set_struct(cfg, False)
     model = get_model(cfg.model)
+    logger.info("Success load model")
 if __name__ == "__main__":