Text-to-Image
Diffusers
English
learn_ddpm / model /attn_utils.py
Harshit Agarwal
initial comm
eaefa93
import torch
from torch import nn
import math
import torch.nn.functional as F
class SelfAttention(nn.Module):
def __init__(self, chs, num_heads=1, ffn_expansion=4, dropout=0.1):
super().__init__()
self.norm = nn.LayerNorm(chs)
self.attn = nn.MultiheadAttention(embed_dim=chs, num_heads=num_heads, batch_first=True)
self.ffn = nn.Sequential(
nn.Linear(chs, chs*ffn_expansion),
nn.GELU(),
nn.Linear(chs*ffn_expansion, chs)
)
self.norm2 = nn.LayerNorm(chs)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
b,c,h,w = x.shape
x_reshaped = x.view(b,c,h*w).transpose(1,2)
attn_out, _ = self.attn(self.norm(x_reshaped), self.norm(x_reshaped), self.norm(x_reshaped))
x_attn = x_reshaped + self.dropout(attn_out)
ffn_out = self.ffn(self.norm2(x_attn))
x_out = x_attn + self.dropout(ffn_out)
x_out = x_out.transpose(1,2).view(b,c,h,w)
return x_out
class CBAM(nn.Module):
def __init__(self,chs, reduction=16):
super().__init__()
self.channel_attn = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(chs, chs//reduction, 1),
nn.ReLU(),
nn.Conv2d(chs//reduction, chs, 1),
nn.Sigmoid()
)
self.spatial_attn = nn.Sequential(
nn.Conv2d(2,1,kernel_size=7,padding=3),
nn.Sigmoid()
)
def forward(self,x):
ch_wt = self.channel_attn(x)
x = x*ch_wt
avg_pool = torch.mean(x, dim=1, keepdim=True)
max_pool, _ = torch.max(x, dim=1, keepdim=True)
sp_wt = self.spatial_attn(torch.cat([avg_pool, max_pool], dim=1))
x = x* sp_wt
return x
class Block_CBAM(nn.Module):
def __init__(self, in_ch, out_ch, time_emb_dim, up=False):
super().__init__()
self.time_mlp = nn.Linear(time_emb_dim, out_ch)
if up:
## up channel - go big big big bigg from smol smol smol with 3x3 kernel
self.conv1 = nn.Conv2d(2*in_ch, out_ch, 3, padding=1)
self.transform = nn.ConvTranspose2d(out_ch, out_ch, 4, 2, 1)
else:
self.conv1 = nn.Conv2d(in_ch, out_ch, 3, padding=1)
self.transform = nn.Conv2d(out_ch, out_ch, 4,2,1)
self.conv2 = nn.Conv2d(out_ch, out_ch, 3, padding=1)
self.relu = nn.ReLU()
self.batch_norm1 = nn.BatchNorm2d(out_ch)
self.batch_norm2 = nn.BatchNorm2d(out_ch)
self.cbam = CBAM(out_ch)
def forward(self, x, t, ):
h = self.batch_norm1(self.relu(self.conv1(x)))
time_emb = self.relu(self.time_mlp(t))
time_emb = time_emb[(..., ) + (None, ) * 2]
h = h + time_emb
h = self.batch_norm2(self.relu(self.conv2(h)))
h = self.cbam(h)
return self.transform(h)