zhzluke96
update
32b2aaa
raw
history blame
4.18 kB
import torch.nn.functional as F
from torch import nn
class PreactResBlock(nn.Sequential):
def __init__(self, dim):
super().__init__(
nn.GroupNorm(dim // 16, dim),
nn.GELU(),
nn.Conv2d(dim, dim, 3, padding=1),
nn.GroupNorm(dim // 16, dim),
nn.GELU(),
nn.Conv2d(dim, dim, 3, padding=1),
)
def forward(self, x):
return x + super().forward(x)
class UNetBlock(nn.Module):
def __init__(self, input_dim, output_dim=None, scale_factor=1.0):
super().__init__()
if output_dim is None:
output_dim = input_dim
self.pre_conv = nn.Conv2d(input_dim, output_dim, 3, padding=1)
self.res_block1 = PreactResBlock(output_dim)
self.res_block2 = PreactResBlock(output_dim)
self.downsample = self.upsample = nn.Identity()
if scale_factor > 1:
self.upsample = nn.Upsample(scale_factor=scale_factor)
elif scale_factor < 1:
self.downsample = nn.Upsample(scale_factor=scale_factor)
def forward(self, x, h=None):
"""
Args:
x: (b c h w), last output
h: (b c h w), skip output
Returns:
o: (b c h w), output
s: (b c h w), skip output
"""
x = self.upsample(x)
if h is not None:
assert x.shape == h.shape, f"{x.shape} != {h.shape}"
x = x + h
x = self.pre_conv(x)
x = self.res_block1(x)
x = self.res_block2(x)
return self.downsample(x), x
class UNet(nn.Module):
def __init__(self, input_dim, output_dim, hidden_dim=16, num_blocks=4, num_middle_blocks=2):
super().__init__()
self.input_dim = input_dim
self.output_dim = output_dim
self.input_proj = nn.Conv2d(input_dim, hidden_dim, 3, padding=1)
self.encoder_blocks = nn.ModuleList(
[
UNetBlock(input_dim=hidden_dim * 2**i, output_dim=hidden_dim * 2 ** (i + 1), scale_factor=0.5)
for i in range(num_blocks)
]
)
self.middle_blocks = nn.ModuleList(
[UNetBlock(input_dim=hidden_dim * 2**num_blocks) for _ in range(num_middle_blocks)]
)
self.decoder_blocks = nn.ModuleList(
[
UNetBlock(input_dim=hidden_dim * 2 ** (i + 1), output_dim=hidden_dim * 2**i, scale_factor=2)
for i in reversed(range(num_blocks))
]
)
self.head = nn.Sequential(
nn.Conv2d(hidden_dim, hidden_dim, 3, padding=1),
nn.GELU(),
nn.Conv2d(hidden_dim, output_dim, 1),
)
@property
def scale_factor(self):
return 2 ** len(self.encoder_blocks)
def pad_to_fit(self, x):
"""
Args:
x: (b c h w), input
Returns:
x: (b c h' w'), padded input
"""
hpad = (self.scale_factor - x.shape[2] % self.scale_factor) % self.scale_factor
wpad = (self.scale_factor - x.shape[3] % self.scale_factor) % self.scale_factor
return F.pad(x, (0, wpad, 0, hpad))
def forward(self, x):
"""
Args:
x: (b c h w), input
Returns:
o: (b c h w), output
"""
shape = x.shape
x = self.pad_to_fit(x)
x = self.input_proj(x)
s_list = []
for block in self.encoder_blocks:
x, s = block(x)
s_list.append(s)
for block in self.middle_blocks:
x, _ = block(x)
for block, s in zip(self.decoder_blocks, reversed(s_list)):
x, _ = block(x, s)
x = self.head(x)
x = x[..., : shape[2], : shape[3]]
return x
def test(self, shape=(3, 512, 256)):
import ptflops
macs, params = ptflops.get_model_complexity_info(
self,
shape,
as_strings=True,
print_per_layer_stat=True,
verbose=True,
)
print(f"macs: {macs}")
print(f"params: {params}")
def main():
model = UNet(3, 3)
model.test()
if __name__ == "__main__":
main()