File size: 7,704 Bytes
3094730
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List

import torch.nn as nn
from mmcv.cnn import ConvModule
from mmdet.utils import ConfigType, OptMultiConfig

from mmyolo.models.backbones.csp_resnet import CSPResLayer
from mmyolo.models.necks import BaseYOLONeck
from mmyolo.registry import MODELS


@MODELS.register_module()
class PPYOLOECSPPAFPN(BaseYOLONeck):
    """CSPPAN in PPYOLOE.

    Args:
        in_channels (List[int]): Number of input channels per scale.
        out_channels (List[int]): Number of output channels
            (used at each scale).
        deepen_factor (float): Depth multiplier, multiply number of
            blocks in CSP layer by this amount. Defaults to 1.0.
        widen_factor (float): Width multiplier, multiply number of
            channels in each layer by this amount. Defaults to 1.0.
        freeze_all(bool): Whether to freeze the model.
        num_csplayer (int): Number of `CSPResLayer` in per layer.
            Defaults to 1.
        num_blocks_per_layer (int): Number of blocks per `CSPResLayer`.
            Defaults to 3.
        block_cfg (dict): Config dict for block. Defaults to
            dict(type='PPYOLOEBasicBlock', shortcut=True, use_alpha=False)
        norm_cfg (dict): Config dict for normalization layer.
            Defaults to dict(type='BN', momentum=0.1, eps=1e-5).
        act_cfg (dict): Config dict for activation layer.
            Defaults to dict(type='SiLU', inplace=True).
        drop_block_cfg (dict, optional): Drop block config.
            Defaults to None. If you want to use Drop block after
            `CSPResLayer`, you can set this para as
            dict(type='mmdet.DropBlock', drop_prob=0.1,
            block_size=3, warm_iters=0).
        init_cfg (dict or list[dict], optional): Initialization config dict.
            Defaults to None.
        use_spp (bool): Whether to use `SPP` in reduce layer.
            Defaults to False.
    """

    def __init__(self,
                 in_channels: List[int] = [256, 512, 1024],
                 out_channels: List[int] = [256, 512, 1024],
                 deepen_factor: float = 1.0,
                 widen_factor: float = 1.0,
                 freeze_all: bool = False,
                 num_csplayer: int = 1,
                 num_blocks_per_layer: int = 3,
                 block_cfg: ConfigType = dict(
                     type='PPYOLOEBasicBlock', shortcut=False,
                     use_alpha=False),
                 norm_cfg: ConfigType = dict(
                     type='BN', momentum=0.1, eps=1e-5),
                 act_cfg: ConfigType = dict(type='SiLU', inplace=True),
                 drop_block_cfg: ConfigType = None,
                 init_cfg: OptMultiConfig = None,
                 use_spp: bool = False):
        self.block_cfg = block_cfg
        self.num_csplayer = num_csplayer
        self.num_blocks_per_layer = round(num_blocks_per_layer * deepen_factor)
        # Only use spp in last reduce_layer, if use_spp=True.
        self.use_spp = use_spp
        self.drop_block_cfg = drop_block_cfg
        assert drop_block_cfg is None or isinstance(drop_block_cfg, dict)

        super().__init__(
            in_channels=[
                int(channel * widen_factor) for channel in in_channels
            ],
            out_channels=[
                int(channel * widen_factor) for channel in out_channels
            ],
            deepen_factor=deepen_factor,
            widen_factor=widen_factor,
            freeze_all=freeze_all,
            norm_cfg=norm_cfg,
            act_cfg=act_cfg,
            init_cfg=init_cfg)

    def build_reduce_layer(self, idx: int):
        """build reduce layer.

        Args:
            idx (int): layer idx.

        Returns:
            nn.Module: The reduce layer.
        """
        if idx == len(self.in_channels) - 1:
            # fpn_stage
            in_channels = self.in_channels[idx]
            out_channels = self.out_channels[idx]

            layer = [
                CSPResLayer(
                    in_channels=in_channels if i == 0 else out_channels,
                    out_channels=out_channels,
                    num_block=self.num_blocks_per_layer,
                    block_cfg=self.block_cfg,
                    norm_cfg=self.norm_cfg,
                    act_cfg=self.act_cfg,
                    attention_cfg=None,
                    use_spp=self.use_spp) for i in range(self.num_csplayer)
            ]

            if self.drop_block_cfg:
                layer.append(MODELS.build(self.drop_block_cfg))
            layer = nn.Sequential(*layer)
        else:
            layer = nn.Identity()

        return layer

    def build_upsample_layer(self, idx: int) -> nn.Module:
        """build upsample layer."""
        # fpn_route
        in_channels = self.out_channels[idx]
        return nn.Sequential(
            ConvModule(
                in_channels=in_channels,
                out_channels=in_channels // 2,
                kernel_size=1,
                stride=1,
                padding=0,
                norm_cfg=self.norm_cfg,
                act_cfg=self.act_cfg),
            nn.Upsample(scale_factor=2, mode='nearest'))

    def build_top_down_layer(self, idx: int) -> nn.Module:
        """build top down layer.

        Args:
            idx (int): layer idx.

        Returns:
            nn.Module: The top down layer.
        """
        # fpn_stage
        in_channels = self.in_channels[idx - 1] + self.out_channels[idx] // 2
        out_channels = self.out_channels[idx - 1]

        layer = [
            CSPResLayer(
                in_channels=in_channels if i == 0 else out_channels,
                out_channels=out_channels,
                num_block=self.num_blocks_per_layer,
                block_cfg=self.block_cfg,
                norm_cfg=self.norm_cfg,
                act_cfg=self.act_cfg,
                attention_cfg=None,
                use_spp=False) for i in range(self.num_csplayer)
        ]

        if self.drop_block_cfg:
            layer.append(MODELS.build(self.drop_block_cfg))

        return nn.Sequential(*layer)

    def build_downsample_layer(self, idx: int) -> nn.Module:
        """build downsample layer.

        Args:
            idx (int): layer idx.

        Returns:
            nn.Module: The downsample layer.
        """
        # pan_route
        return ConvModule(
            in_channels=self.out_channels[idx],
            out_channels=self.out_channels[idx],
            kernel_size=3,
            stride=2,
            padding=1,
            norm_cfg=self.norm_cfg,
            act_cfg=self.act_cfg)

    def build_bottom_up_layer(self, idx: int) -> nn.Module:
        """build bottom up layer.

        Args:
            idx (int): layer idx.

        Returns:
            nn.Module: The bottom up layer.
        """
        # pan_stage
        in_channels = self.out_channels[idx + 1] + self.out_channels[idx]
        out_channels = self.out_channels[idx + 1]

        layer = [
            CSPResLayer(
                in_channels=in_channels if i == 0 else out_channels,
                out_channels=out_channels,
                num_block=self.num_blocks_per_layer,
                block_cfg=self.block_cfg,
                norm_cfg=self.norm_cfg,
                act_cfg=self.act_cfg,
                attention_cfg=None,
                use_spp=False) for i in range(self.num_csplayer)
        ]

        if self.drop_block_cfg:
            layer.append(MODELS.build(self.drop_block_cfg))

        return nn.Sequential(*layer)

    def build_out_layer(self, *args, **kwargs) -> nn.Module:
        """build out layer."""
        return nn.Identity()