import torch
import torch.nn as nn


# basic
class Conv(nn.Module):
    # basic convlution
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size,
        stride=1,
        padding=None,
        dilation=1,
        groups=1,
        act=nn.SiLU(),
        bias=False,
        auto_padding=True,
        padding_mode="zeros",
    ):

        super().__init__()

        # not yet handle the case when dilation is a tuple
        if auto_padding:
            if isinstance(kernel_size, int):
                padding = (dilation * (kernel_size - 1) + 1) // 2
            else:
                padding = [(dilation * (k - 1) + 1) // 2 for k in kernel_size]

        self.conv = nn.Conv2d(
            in_channels, out_channels, kernel_size, stride, padding, groups=groups, dilation=dilation, bias=bias
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

    def forward_fuse(self, x):
        return self.act(self.conv(x))

    # to be implement
    # def fuse_conv_bn(self):


# RepVGG
class RepConv(nn.Module):
    # https://github.com/DingXiaoH/RepVGG
    def __init__(
        self, in_channels, out_channels, kernel_size=3, padding=None, stride=1, groups=1, act=nn.SiLU(), deploy=False
    ):

        super().__init__()
        self.deploy = deploy
        self.conv1 = Conv(in_channels, out_channels, kernel_size, stride, groups=groups, act=False)
        self.conv2 = Conv(in_channels, out_channels, 1, stride, groups=groups, act=False)
        self.act = act if isinstance(act, nn.Module) else nn.Identity()

    def forward(self, x):
        return self.act(self.conv1(x) + self.conv2(x))

    def forward_fuse(self, x):
        return self.act(self.conv(x))

    # to be implement
    # def fuse_convs(self):
    def fuse_conv_bn(self, conv, bn):

        std = (bn.running_var + bn.eps).sqrt()
        bias = bn.bias - bn.running_mean * bn.weight / std

        t = (bn.weight / std).reshape(-1, 1, 1, 1)
        weights = conv.weight * t

        bn = nn.Identity()
        conv = nn.Conv2d(
            in_channels=conv.in_channels,
            out_channels=conv.out_channels,
            kernel_size=conv.kernel_size,
            stride=conv.stride,
            padding=conv.padding,
            dilation=conv.dilation,
            groups=conv.groups,
            bias=True,
            padding_mode=conv.padding_mode,
        )

        conv.weight = torch.nn.Parameter(weights)
        conv.bias = torch.nn.Parameter(bias)
        return conv


# ResNet
class Res(nn.Module):
    # ResNet bottleneck
    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = Conv(in_channels, h_channels, 1, 1, act=act)
        self.cv2 = Conv(h_channels, h_channels, 3, 1, groups=groups, act=act)
        self.cv3 = Conv(h_channels, out_channels, 1, 1, act=act)

    def forward(self, x):
        return x + self.cv3(self.cv2(self.cv1(x)))


class RepRes(nn.Module):
    # RepResNet bottleneck
    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.25):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = Conv(in_channels, h_channels, 1, 1, act=act)
        self.cv2 = RepConv(h_channels, h_channels, 3, 1, groups=groups, act=act)
        self.cv3 = Conv(h_channels, out_channels, 1, 1, act=act)

    def forward(self, x):
        return x + self.cv3(self.cv2(self.cv1(x)))


class ConvBlock(nn.Module):
    # ConvBlock
    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = (
            Conv(in_channels, in_channels, 3, 1, act=act)
            if repeat == 1
            else Conv(in_channels, h_channels, 3, 1, act=act)
        )
        self.cb = (
            nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
            if repeat > 2
            else nn.Identity()
        )
        self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)

    def forward(self, x):
        return self.cv2(self.cb(self.cv1(x)))


class RepConvBlock(nn.Module):
    # ConvBlock
    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = (
            Conv(in_channels, in_channels, 3, 1, act=act)
            if repeat == 1
            else RepConv(in_channels, h_channels, 3, 1, act=act)
        )
        self.cb = (
            nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
            if repeat > 2
            else nn.Identity()
        )
        self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)

    def forward(self, x):
        return self.cv2(self.cb(self.cv1(x)))


class ResConvBlock(nn.Module):
    # ResConvBlock
    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = (
            Conv(in_channels, in_channels, 3, 1, act=act)
            if repeat == 1
            else Conv(in_channels, h_channels, 3, 1, act=act)
        )
        self.cb = (
            nn.Sequential(*(Conv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
            if repeat > 2
            else nn.Identity()
        )
        self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)

    def forward(self, x):
        return x + self.cv2(self.cb(self.cv1(x)))


class ResRepConvBlock(nn.Module):
    # ResConvBlock
    def __init__(self, in_channels, repeat=1, act=nn.ReLU(), ratio=1.0):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = (
            Conv(in_channels, in_channels, 3, 1, act=act)
            if repeat == 1
            else RepConv(in_channels, h_channels, 3, 1, act=act)
        )
        self.cb = (
            nn.Sequential(*(RepConv(in_channels, in_channels, 3, 1, act=act) for _ in range(repeat - 2)))
            if repeat > 2
            else nn.Identity()
        )
        self.cv2 = nn.Identity() if repeat == 1 else Conv(h_channels, in_channels, 3, 1, act=act)

    def forward(self, x):
        return x + self.cv2(self.cb(self.cv1(x)))


# Darknet
class Dark(nn.Module):
    # DarkNet bottleneck
    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = Conv(in_channels, h_channels, 1, 1, act=act)
        self.cv2 = Conv(h_channels, out_channels, 3, 1, groups=groups, act=act)

    def forward(self, x):
        return x + self.cv2(self.cv1(x))


class RepDark(nn.Module):
    # RepDarkNet bottleneck
    def __init__(self, in_channels, out_channels, groups=1, act=nn.ReLU(), ratio=0.5):

        super().__init__()

        h_channels = int(in_channels * ratio)
        self.cv1 = RepConv(in_channels, h_channels, 3, 1, groups=groups, act=act)
        self.cv2 = Conv(h_channels, out_channels, 1, 1, act=act)

    def forward(self, x):
        return x + self.cv2(self.cv1(x))


# CSPNet
class CSP(nn.Module):
    # CSPNet
    def __init__(self, in_channels, out_channels, repeat=1, cb_repeat=2, act=nn.ReLU(), ratio=1.0):

        super().__init__()

        h_channels = in_channels // 2
        self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
        self.cb = nn.Sequential(*(ResConvBlock(h_channels, act=act, repeat=cb_repeat) for _ in range(repeat)))
        self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)

    def forward(self, x):

        y = list(self.cv1(x).chunk(2, 1))

        return self.cv2(torch.cat((self.cb(y[0]), y[1]), 1))


class CSPDark(nn.Module):
    # CSPNet
    def __init__(self, in_channels, out_channels, repeat=1, groups=1, act=nn.ReLU(), ratio=1.0):

        super().__init__()

        h_channels = in_channels // 2
        self.cv1 = Conv(in_channels, in_channels, 1, 1, act=act)
        self.cb = nn.Sequential(
            *(Dark(h_channels, h_channels, groups=groups, act=act, ratio=ratio) for _ in range(repeat))
        )
        self.cv2 = Conv(2 * h_channels, out_channels, 1, 1, act=act)

    def forward(self, x):

        y = list(self.cv1(x).chunk(2, 1))

        return self.cv2(torch.cat((self.cb(y[0]), y[1]), 1))


# ELAN
class ELAN(nn.Module):
    # ELAN
    def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):

        super().__init__()

        h_channels = med_channels // 2
        self.cv1 = Conv(in_channels, med_channels, 1, 1)
        self.cb = nn.ModuleList(ConvBlock(h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
        self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)

    def forward(self, x):

        y = list(self.cv1(x).chunk(2, 1))
        y.extend((m(y[-1])) for m in self.cb)

        return self.cv2(torch.cat(y, 1))


class CSPELAN(nn.Module):
    # ELAN
    def __init__(self, in_channels, out_channels, med_channels, elan_repeat=2, cb_repeat=2, ratio=1.0):

        super().__init__()

        h_channels = med_channels // 2
        self.cv1 = Conv(in_channels, med_channels, 1, 1)
        self.cb = nn.ModuleList(CSP(h_channels, h_channels, repeat=cb_repeat, ratio=ratio) for _ in range(elan_repeat))
        self.cv2 = Conv((2 + elan_repeat) * h_channels, out_channels, 1, 1)

    def forward(self, x):

        y = list(self.cv1(x).chunk(2, 1))
        y.extend((m(y[-1])) for m in self.cb)

        return self.cv2(torch.cat(y, 1))


class Concat(nn.Module):
    def __init__(self, dim=1):
        super(Concat, self).__init__()
        self.dim = dim

    def forward(self, x):
        return torch.cat(x, self.dim)


class MaxPool(nn.Module):
    def __init__(self, kernel_size: int = 2):
        super().__init__()
        self.pool_layer = nn.MaxPool2d(kernel_size=kernel_size, stride=kernel_size)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.pool_layer(x)


# TODO: check if Mit
class SPPCSPConv(nn.Module):
    # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
    def __init__(self, in_channels, out_channels, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
        super(SPPCSPConv, self).__init__()
        c_ = int(2 * out_channels * e)  # hidden channels
        self.cv1 = Conv(in_channels, c_, 1, 1)
        self.cv2 = Conv(in_channels, c_, 1, 1)
        self.cv3 = Conv(c_, c_, 3, 1)
        self.cv4 = Conv(c_, c_, 1, 1)
        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
        self.cv5 = Conv(4 * c_, c_, 1, 1)
        self.cv6 = Conv(c_, c_, 3, 1)
        self.cv7 = Conv(2 * c_, out_channels, 1, 1)

    def forward(self, x):
        x1 = self.cv4(self.cv3(self.cv1(x)))
        y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
        y2 = self.cv2(x)
        return self.cv7(torch.cat((y1, y2), dim=1))


class ImplicitA(nn.Module):
    """
    Implement YOLOR - implicit knowledge(Add), paper: https://arxiv.org/abs/2105.04206
    """

    def __init__(self, channel: int, mean: float = 0.0, std: float = 0.02):
        super().__init__()
        self.channel = channel
        self.mean = mean
        self.std = std

        self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
        nn.init.normal_(self.implicit, mean=mean, std=self.std)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.implicit + x


class ImplicitM(nn.Module):
    """
    Implement YOLOR - implicit knowledge(multiply), paper: https://arxiv.org/abs/2105.04206
    """

    def __init__(self, channel: int, mean: float = 1.0, std: float = 0.02):
        super().__init__()
        self.channel = channel
        self.mean = mean
        self.std = std

        self.implicit = nn.Parameter(torch.empty(1, channel, 1, 1))
        nn.init.normal_(self.implicit, mean=self.mean, std=self.std)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.implicit * x


class UpSample(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.UpSample = nn.Upsample(**kwargs)

    def forward(self, x):
        return self.UpSample(x)


class IDetect(nn.Module):
    """
    #TODO: Add Detect class, change IDetect base class
    """

    stride = None  # strides computed during build
    export = False  # onnx export
    end2end = False
    include_nms = False
    concat = False

    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
        super(IDetect, self).__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
        self.register_buffer("anchors", a)  # shape(nl,na,2)
        self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv

        self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
        self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)

    def forward(self, x):
        # x = x.copy()  # for profiling
        z = []  # inference output
        self.training |= self.export
        for i in range(self.nl):
            x[i] = self.m[i](self.ia[i](x[i]))  # conv
            x[i] = self.im[i](x[i])
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

                y = x[i].sigmoid()
                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                z.append(y.view(bs, -1, self.no))

        return x if self.training else (torch.cat(z, 1), x)