|
import torch |
|
import torchvision |
|
import torchvision.transforms as transforms |
|
import torch.optim as optim |
|
import torch.nn as nn |
|
import torch.nn.functional as F |
|
import numpy as np |
|
|
|
|
|
class down(nn.Module): |
|
""" |
|
A class for creating neural network blocks containing layers: |
|
|
|
Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU |
|
|
|
This is used in the UNet Class to create a UNet like NN architecture. |
|
|
|
... |
|
|
|
Methods |
|
------- |
|
forward(x) |
|
Returns output tensor after passing input `x` to the neural network |
|
block. |
|
""" |
|
|
|
|
|
def __init__(self, inChannels, outChannels, filterSize): |
|
""" |
|
Parameters |
|
---------- |
|
inChannels : int |
|
number of input channels for the first convolutional layer. |
|
outChannels : int |
|
number of output channels for the first convolutional layer. |
|
This is also used as input and output channels for the |
|
second convolutional layer. |
|
filterSize : int |
|
filter size for the convolution filter. input N would create |
|
a N x N filter. |
|
""" |
|
|
|
|
|
super(down, self).__init__() |
|
|
|
self.conv1 = nn.Conv2d(inChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2)) |
|
self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2)) |
|
|
|
def forward(self, x): |
|
""" |
|
Returns output tensor after passing input `x` to the neural network |
|
block. |
|
|
|
Parameters |
|
---------- |
|
x : tensor |
|
input to the NN block. |
|
|
|
Returns |
|
------- |
|
tensor |
|
output of the NN block. |
|
""" |
|
|
|
|
|
|
|
x = F.avg_pool2d(x, 2) |
|
|
|
x = F.leaky_relu(self.conv1(x), negative_slope = 0.1) |
|
|
|
x = F.leaky_relu(self.conv2(x), negative_slope = 0.1) |
|
return x |
|
|
|
class up(nn.Module): |
|
""" |
|
A class for creating neural network blocks containing layers: |
|
|
|
Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU |
|
|
|
This is used in the UNet Class to create a UNet like NN architecture. |
|
|
|
... |
|
|
|
Methods |
|
------- |
|
forward(x, skpCn) |
|
Returns output tensor after passing input `x` to the neural network |
|
block. |
|
""" |
|
|
|
|
|
def __init__(self, inChannels, outChannels): |
|
""" |
|
Parameters |
|
---------- |
|
inChannels : int |
|
number of input channels for the first convolutional layer. |
|
outChannels : int |
|
number of output channels for the first convolutional layer. |
|
This is also used for setting input and output channels for |
|
the second convolutional layer. |
|
""" |
|
|
|
|
|
super(up, self).__init__() |
|
|
|
self.conv1 = nn.Conv2d(inChannels, outChannels, 3, stride=1, padding=1) |
|
|
|
self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1) |
|
|
|
def forward(self, x, skpCn): |
|
x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True) |
|
if x.size(-1) != skpCn.size(-1): |
|
skpCn = skpCn[:, :, :, :x.size(-1)] |
|
if x.size(-2) != skpCn.size(-2): |
|
skpCn = skpCn[:, :, :x.size(-2), :] |
|
x = F.leaky_relu(self.conv1(x), negative_slope=0.1) |
|
x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1) |
|
return x |
|
|
|
|
|
class UNet(nn.Module): |
|
""" |
|
A class for creating UNet like architecture as specified by the |
|
Super SloMo paper. |
|
|
|
... |
|
|
|
Methods |
|
------- |
|
forward(x) |
|
Returns output tensor after passing input `x` to the neural network |
|
block. |
|
""" |
|
|
|
|
|
def __init__(self, inChannels, outChannels): |
|
""" |
|
Parameters |
|
---------- |
|
inChannels : int |
|
number of input channels for the UNet. |
|
outChannels : int |
|
number of output channels for the UNet. |
|
""" |
|
|
|
|
|
super(UNet, self).__init__() |
|
|
|
self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3) |
|
self.conv2 = nn.Conv2d(32, 32, 7, stride=1, padding=3) |
|
self.down1 = down(32, 64, 5) |
|
self.down2 = down(64, 128, 3) |
|
self.down3 = down(128, 256, 3) |
|
self.down4 = down(256, 512, 3) |
|
self.down5 = down(512, 512, 3) |
|
self.up1 = up(512, 512) |
|
self.up2 = up(512, 256) |
|
self.up3 = up(256, 128) |
|
self.up4 = up(128, 64) |
|
self.up5 = up(64, 32) |
|
self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1) |
|
|
|
def forward(self, x,time_steps=None): |
|
""" |
|
Returns output tensor after passing input `x` to the neural network. |
|
|
|
Parameters |
|
---------- |
|
x : tensor |
|
input to the UNet. |
|
|
|
Returns |
|
------- |
|
tensor |
|
output of the UNet. |
|
""" |
|
if time_steps: |
|
time_steps = time_steps.view(-1,1,1,1).expand(-1,1,x.size(2),x.size(3)) |
|
torch.cat((x,time_steps),1) |
|
|
|
|
|
x = F.leaky_relu(self.conv1(x), negative_slope = 0.1) |
|
s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1) |
|
s2 = self.down1(s1) |
|
s3 = self.down2(s2) |
|
s4 = self.down3(s3) |
|
s5 = self.down4(s4) |
|
x = self.down5(s5) |
|
x = self.up1(x, s5) |
|
x = self.up2(x, s4) |
|
x = self.up3(x, s3) |
|
x = self.up4(x, s2) |
|
x = self.up5(x, s1) |
|
x = F.leaky_relu(self.conv3(x), negative_slope = 0.1) |
|
return x |
|
|
|
|
|
class backWarp(nn.Module): |
|
""" |
|
A class for creating a backwarping object. |
|
|
|
This is used for backwarping to an image: |
|
|
|
Given optical flow from frame I0 to I1 --> F_0_1 and frame I1, |
|
it generates I0 <-- backwarp(F_0_1, I1). |
|
|
|
... |
|
|
|
Methods |
|
------- |
|
forward(x) |
|
Returns output tensor after passing input `img` and `flow` to the backwarping |
|
block. |
|
""" |
|
|
|
|
|
def __init__(self, W, H, device): |
|
""" |
|
Parameters |
|
---------- |
|
W : int |
|
width of the image. |
|
H : int |
|
height of the image. |
|
device : device |
|
computation device (cpu/cuda). |
|
""" |
|
|
|
|
|
super(backWarp, self).__init__() |
|
|
|
gridX, gridY = np.meshgrid(np.arange(W), np.arange(H)) |
|
self.W = W |
|
self.H = H |
|
self.gridX = torch.tensor(gridX, requires_grad=False, device=device) |
|
self.gridY = torch.tensor(gridY, requires_grad=False, device=device) |
|
|
|
def forward(self, img, flow): |
|
""" |
|
Returns output tensor after passing input `img` and `flow` to the backwarping |
|
block. |
|
I0 = backwarp(I1, F_0_1) |
|
|
|
Parameters |
|
---------- |
|
img : tensor |
|
frame I1. |
|
flow : tensor |
|
optical flow from I0 and I1: F_0_1. |
|
|
|
Returns |
|
------- |
|
tensor |
|
frame I0. |
|
""" |
|
|
|
|
|
|
|
u = flow[:, 0, :, :] |
|
v = flow[:, 1, :, :] |
|
x = self.gridX.unsqueeze(0).expand_as(u).float() + u |
|
y = self.gridY.unsqueeze(0).expand_as(v).float() + v |
|
|
|
x = 2*(x/self.W - 0.5) |
|
y = 2*(y/self.H - 0.5) |
|
|
|
grid = torch.stack((x,y), dim=3) |
|
|
|
imgOut = torch.nn.functional.grid_sample(img, grid) |
|
return imgOut |
|
|
|
|
|
|
|
|
|
t = np.linspace(0.125, 0.875, 7) |
|
|
|
def getFlowCoeff (indices, device): |
|
""" |
|
Gets flow coefficients used for calculating intermediate optical |
|
flows from optical flows between I0 and I1: F_0_1 and F_1_0. |
|
|
|
F_t_0 = C00 x F_0_1 + C01 x F_1_0 |
|
F_t_1 = C10 x F_0_1 + C11 x F_1_0 |
|
|
|
where, |
|
C00 = -(1 - t) x t |
|
C01 = t x t |
|
C10 = (1 - t) x (1 - t) |
|
C11 = -t x (1 - t) |
|
|
|
Parameters |
|
---------- |
|
indices : tensor |
|
indices corresponding to the intermediate frame positions |
|
of all samples in the batch. |
|
device : device |
|
computation device (cpu/cuda). |
|
|
|
Returns |
|
------- |
|
tensor |
|
coefficients C00, C01, C10, C11. |
|
""" |
|
|
|
|
|
|
|
ind = indices.detach().numpy() |
|
C11 = C00 = - (1 - (t[ind])) * (t[ind]) |
|
C01 = (t[ind]) * (t[ind]) |
|
C10 = (1 - (t[ind])) * (1 - (t[ind])) |
|
return torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device) |
|
|
|
def getWarpCoeff (indices, device): |
|
""" |
|
Gets coefficients used for calculating final intermediate |
|
frame `It_gen` from backwarped images using flows F_t_0 and F_t_1. |
|
|
|
It_gen = (C0 x V_t_0 x g_I_0_F_t_0 + C1 x V_t_1 x g_I_1_F_t_1) / (C0 x V_t_0 + C1 x V_t_1) |
|
|
|
where, |
|
C0 = 1 - t |
|
C1 = t |
|
|
|
V_t_0, V_t_1 --> visibility maps |
|
g_I_0_F_t_0, g_I_1_F_t_1 --> backwarped intermediate frames |
|
|
|
Parameters |
|
---------- |
|
indices : tensor |
|
indices corresponding to the intermediate frame positions |
|
of all samples in the batch. |
|
device : device |
|
computation device (cpu/cuda). |
|
|
|
Returns |
|
------- |
|
tensor |
|
coefficients C0 and C1. |
|
""" |
|
|
|
|
|
|
|
ind = indices.detach().numpy() |
|
C0 = 1 - t[ind] |
|
C1 = t[ind] |
|
return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device) |