Spaces:
Sleeping
Sleeping
File size: 3,307 Bytes
d1bcb75 53d646f d1bcb75 f997506 53d646f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import torch
import torch.nn as nn
class AttentionBlock(nn.Module):
def __init__(self, in_channels, out_channels, kernel_size=3, padding=1):
super(AttentionBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels,
kernel_size=kernel_size, padding=padding)
self.conv2 = nn.Conv2d(out_channels, out_channels,
kernel_size=kernel_size, padding=padding)
self.attn = nn.MultiheadAttention(
out_channels, num_heads=8, batch_first=True)
self.norm = nn.LayerNorm(out_channels)
self.activation = nn.ReLU()
def forward(self, x):
x = self.conv1(x)
x = self.activation(x)
x = self.conv2(x)
b, c, h, w = x.size()
x = x.view(b, c, h * w).permute(2, 0, 1) # Reshape and permute
attn_output, _ = self.attn(x, x, x)
x = attn_output.permute(1, 2, 0).view(
b, c, h, w) # Revert the permute and reshape
x = x.view(b, c, -1) # Flatten the last two dimensions
# Reshape for LayerNorm and apply normalization
x = self.norm(x.reshape(b, -1, c))
x = x.view(b, c, h, w) # Reshape back to original
return x
class UNet(nn.Module):
def __init__(self):
super(UNet, self).__init__()
self.encoder = nn.Sequential(
nn.Conv2d(3, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(32, 64, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(64, 128, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.ReLU(),
)
self.lstm = nn.LSTM(512, 512, batch_first=True)
self.attn_block = AttentionBlock(512, 512)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(1024, 256, kernel_size=2, stride=2),
nn.ReLU(),
nn.ConvTranspose2d(512, 128, kernel_size=2, stride=2),
nn.ReLU(),
nn.ConvTranspose2d(256, 64, kernel_size=2, stride=2),
nn.ReLU(),
nn.ConvTranspose2d(128, 32, kernel_size=2, stride=2),
nn.ReLU(),
nn.ConvTranspose2d(64, 3, kernel_size=1),
nn.Sigmoid(),
)
def forward(self, x):
skip_connections = []
for layer in self.encoder:
x = layer(x)
skip_connections.append(x)
if isinstance(layer, nn.MaxPool2d):
skip_connections.pop()
batch_size, channels, height, width = x.size()
x = x.view(batch_size, -1, channels)
x, _ = self.lstm(x)
x = x.unsqueeze(1)
x = x.permute(0, 2, 3, 1)
x = x.reshape(batch_size, channels, height, width)
x = self.attn_block(x)
skip_connections = skip_connections[::-1]
for i, layer in enumerate(self.decoder):
if isinstance(layer, nn.ConvTranspose2d):
x = layer(torch.cat((x, skip_connections[i]), dim=1))
else:
x = layer(x)
return x
|