Spaces:
Running
Running
File size: 4,567 Bytes
9bf4bd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Union
import torch
import torch.nn.functional as F
from mmcv.cnn import ConvModule
from mmengine.model import BaseModule, ModuleList
from torch import Tensor
from mmocr.registry import MODELS
@MODELS.register_module()
class FPNF(BaseModule):
"""FPN-like fusion module in Shape Robust Text Detection with Progressive
Scale Expansion Network.
Args:
in_channels (list[int]): A list of number of input channels.
Defaults to [256, 512, 1024, 2048].
out_channels (int): The number of output channels.
Defaults to 256.
fusion_type (str): Type of the final feature fusion layer. Available
options are "concat" and "add". Defaults to "concat".
init_cfg (dict or list[dict], optional): Initialization configs.
Defaults to
dict(type='Xavier', layer='Conv2d', distribution='uniform')
"""
def __init__(
self,
in_channels: List[int] = [256, 512, 1024, 2048],
out_channels: int = 256,
fusion_type: str = 'concat',
init_cfg: Optional[Union[Dict, List[Dict]]] = dict(
type='Xavier', layer='Conv2d', distribution='uniform')
) -> None:
super().__init__(init_cfg=init_cfg)
conv_cfg = None
norm_cfg = dict(type='BN')
act_cfg = dict(type='ReLU')
self.in_channels = in_channels
self.out_channels = out_channels
self.lateral_convs = ModuleList()
self.fpn_convs = ModuleList()
self.backbone_end_level = len(in_channels)
for i in range(self.backbone_end_level):
l_conv = ConvModule(
in_channels[i],
out_channels,
1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
self.lateral_convs.append(l_conv)
if i < self.backbone_end_level - 1:
fpn_conv = ConvModule(
out_channels,
out_channels,
3,
padding=1,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
self.fpn_convs.append(fpn_conv)
self.fusion_type = fusion_type
if self.fusion_type == 'concat':
feature_channels = 1024
elif self.fusion_type == 'add':
feature_channels = 256
else:
raise NotImplementedError
self.output_convs = ConvModule(
feature_channels,
out_channels,
3,
padding=1,
conv_cfg=None,
norm_cfg=norm_cfg,
act_cfg=act_cfg,
inplace=False)
def forward(self, inputs: List[Tensor]) -> Tensor:
"""
Args:
inputs (list[Tensor]): Each tensor has the shape of
:math:`(N, C_i, H_i, W_i)`. It usually expects 4 tensors
(C2-C5 features) from ResNet.
Returns:
Tensor: A tensor of shape :math:`(N, C_{out}, H_0, W_0)` where
:math:`C_{out}` is ``out_channels``.
"""
assert len(inputs) == len(self.in_channels)
# build laterals
laterals = [
lateral_conv(inputs[i])
for i, lateral_conv in enumerate(self.lateral_convs)
]
# build top-down path
used_backbone_levels = len(laterals)
for i in range(used_backbone_levels - 1, 0, -1):
# step 1: upsample to level i-1 size and add level i-1
prev_shape = laterals[i - 1].shape[2:]
laterals[i - 1] = laterals[i - 1] + F.interpolate(
laterals[i], size=prev_shape, mode='nearest')
# step 2: smooth level i-1
laterals[i - 1] = self.fpn_convs[i - 1](laterals[i - 1])
# upsample and cat
bottom_shape = laterals[0].shape[2:]
for i in range(1, used_backbone_levels):
laterals[i] = F.interpolate(
laterals[i], size=bottom_shape, mode='nearest')
if self.fusion_type == 'concat':
out = torch.cat(laterals, 1)
elif self.fusion_type == 'add':
out = laterals[0]
for i in range(1, used_backbone_levels):
out += laterals[i]
else:
raise NotImplementedError
out = self.output_convs(out)
return out
|