import torch import torchvision import torchvision.transforms as transforms import torch.optim as optim import torch.nn as nn import torch.nn.functional as F import numpy as np class down(nn.Module): """ A class for creating neural network blocks containing layers: Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU This is used in the UNet Class to create a UNet like NN architecture. ... Methods ------- forward(x) Returns output tensor after passing input `x` to the neural network block. """ def __init__(self, inChannels, outChannels, filterSize): """ Parameters ---------- inChannels : int number of input channels for the first convolutional layer. outChannels : int number of output channels for the first convolutional layer. This is also used as input and output channels for the second convolutional layer. filterSize : int filter size for the convolution filter. input N would create a N x N filter. """ super(down, self).__init__() # Initialize convolutional layers. self.conv1 = nn.Conv2d(inChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2)) self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2)) def forward(self, x): """ Returns output tensor after passing input `x` to the neural network block. Parameters ---------- x : tensor input to the NN block. Returns ------- tensor output of the NN block. """ # Average pooling with kernel size 2 (2 x 2). x = F.avg_pool2d(x, 2) # Convolution + Leaky ReLU x = F.leaky_relu(self.conv1(x), negative_slope = 0.1) # Convolution + Leaky ReLU x = F.leaky_relu(self.conv2(x), negative_slope = 0.1) return x class up(nn.Module): """ A class for creating neural network blocks containing layers: Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU This is used in the UNet Class to create a UNet like NN architecture. ... Methods ------- forward(x, skpCn) Returns output tensor after passing input `x` to the neural network block. """ def __init__(self, inChannels, outChannels): """ Parameters ---------- inChannels : int number of input channels for the first convolutional layer. outChannels : int number of output channels for the first convolutional layer. This is also used for setting input and output channels for the second convolutional layer. """ super(up, self).__init__() # Initialize convolutional layers. self.conv1 = nn.Conv2d(inChannels, outChannels, 3, stride=1, padding=1) # (2 * outChannels) is used for accommodating skip connection. self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1) def forward(self, x, skpCn): x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) if x.size(-1) != skpCn.size(-1): skpCn = skpCn[:, :, :, :x.size(-1)] if x.size(-2) != skpCn.size(-2): skpCn = skpCn[:, :, :x.size(-2), :] x = F.leaky_relu(self.conv1(x), negative_slope=0.1) x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1) return x class UNet(nn.Module): """ A class for creating UNet like architecture as specified by the Super SloMo paper. ... Methods ------- forward(x) Returns output tensor after passing input `x` to the neural network block. """ def __init__(self, inChannels, outChannels): """ Parameters ---------- inChannels : int number of input channels for the UNet. outChannels : int number of output channels for the UNet. """ super(UNet, self).__init__() # Initialize neural network blocks. self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3) self.conv2 = nn.Conv2d(32, 32, 7, stride=1, padding=3) self.down1 = down(32, 64, 5) self.down2 = down(64, 128, 3) self.down3 = down(128, 256, 3) self.down4 = down(256, 512, 3) self.down5 = down(512, 512, 3) self.up1 = up(512, 512) self.up2 = up(512, 256) self.up3 = up(256, 128) self.up4 = up(128, 64) self.up5 = up(64, 32) self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1) def forward(self, x,time_steps=None): """ Returns output tensor after passing input `x` to the neural network. Parameters ---------- x : tensor input to the UNet. Returns ------- tensor output of the UNet. """ if time_steps: time_steps = time_steps.view(-1,1,1,1).expand(-1,1,x.size(2),x.size(3)) torch.cat((x,time_steps),1) x = F.leaky_relu(self.conv1(x), negative_slope = 0.1) s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1) s2 = self.down1(s1) s3 = self.down2(s2) s4 = self.down3(s3) s5 = self.down4(s4) x = self.down5(s5) x = self.up1(x, s5) x = self.up2(x, s4) x = self.up3(x, s3) x = self.up4(x, s2) x = self.up5(x, s1) x = F.leaky_relu(self.conv3(x), negative_slope = 0.1) return x class backWarp(nn.Module): """ A class for creating a backwarping object. This is used for backwarping to an image: Given optical flow from frame I0 to I1 --> F_0_1 and frame I1, it generates I0 <-- backwarp(F_0_1, I1). ... Methods ------- forward(x) Returns output tensor after passing input `img` and `flow` to the backwarping block. """ def __init__(self, W, H, device): """ Parameters ---------- W : int width of the image. H : int height of the image. device : device computation device (cpu/cuda). """ super(backWarp, self).__init__() # create a grid gridX, gridY = np.meshgrid(np.arange(W), np.arange(H)) self.W = W self.H = H self.gridX = torch.tensor(gridX, requires_grad=False, device=device) self.gridY = torch.tensor(gridY, requires_grad=False, device=device) def forward(self, img, flow): """ Returns output tensor after passing input `img` and `flow` to the backwarping block. I0 = backwarp(I1, F_0_1) Parameters ---------- img : tensor frame I1. flow : tensor optical flow from I0 and I1: F_0_1. Returns ------- tensor frame I0. """ # Extract horizontal and vertical flows. u = flow[:, 0, :, :] v = flow[:, 1, :, :] x = self.gridX.unsqueeze(0).expand_as(u).float() + u y = self.gridY.unsqueeze(0).expand_as(v).float() + v # range -1 to 1 x = 2*(x/self.W - 0.5) y = 2*(y/self.H - 0.5) # stacking X and Y grid = torch.stack((x,y), dim=3) # Sample pixels using bilinear interpolation. imgOut = torch.nn.functional.grid_sample(img, grid) return imgOut # Creating an array of `t` values for the 7 intermediate frames between # reference frames I0 and I1. t = np.linspace(0.125, 0.875, 7) def getFlowCoeff (indices, device): """ Gets flow coefficients used for calculating intermediate optical flows from optical flows between I0 and I1: F_0_1 and F_1_0. F_t_0 = C00 x F_0_1 + C01 x F_1_0 F_t_1 = C10 x F_0_1 + C11 x F_1_0 where, C00 = -(1 - t) x t C01 = t x t C10 = (1 - t) x (1 - t) C11 = -t x (1 - t) Parameters ---------- indices : tensor indices corresponding to the intermediate frame positions of all samples in the batch. device : device computation device (cpu/cuda). Returns ------- tensor coefficients C00, C01, C10, C11. """ # Convert indices tensor to numpy array ind = indices.detach().numpy() C11 = C00 = - (1 - (t[ind])) * (t[ind]) C01 = (t[ind]) * (t[ind]) C10 = (1 - (t[ind])) * (1 - (t[ind])) return torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device) def getWarpCoeff (indices, device): """ Gets coefficients used for calculating final intermediate frame `It_gen` from backwarped images using flows F_t_0 and F_t_1. It_gen = (C0 x V_t_0 x g_I_0_F_t_0 + C1 x V_t_1 x g_I_1_F_t_1) / (C0 x V_t_0 + C1 x V_t_1) where, C0 = 1 - t C1 = t V_t_0, V_t_1 --> visibility maps g_I_0_F_t_0, g_I_1_F_t_1 --> backwarped intermediate frames Parameters ---------- indices : tensor indices corresponding to the intermediate frame positions of all samples in the batch. device : device computation device (cpu/cuda). Returns ------- tensor coefficients C0 and C1. """ # Convert indices tensor to numpy array ind = indices.detach().numpy() C0 = 1 - t[ind] C1 = t[ind] return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device)