Spaces:

AashishNKumar
/

video-quality-enhancement

Running

App Files Files Community

video-quality-enhancement / model.py

headscratchertm

interpolated frames get generated for video

2cd2753 6 months ago

raw

history blame

10.3 kB

	import torch
	import torchvision
	import torchvision.transforms as transforms
	import torch.optim as optim
	import torch.nn as nn
	import torch.nn.functional as F
	import numpy as np


	class down(nn.Module):
	"""
	A class for creating neural network blocks containing layers:

	Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU

	This is used in the UNet Class to create a UNet like NN architecture.

	...

	Methods
	-------
	forward(x)
	Returns output tensor after passing input `x` to the neural network
	block.
	"""


	def __init__(self, inChannels, outChannels, filterSize):
	"""
	Parameters
	----------
	inChannels : int
	number of input channels for the first convolutional layer.
	outChannels : int
	number of output channels for the first convolutional layer.
	This is also used as input and output channels for the
	second convolutional layer.
	filterSize : int
	filter size for the convolution filter. input N would create
	a N x N filter.
	"""


	super(down, self).__init__()
	# Initialize convolutional layers.
	self.conv1 = nn.Conv2d(inChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
	self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))

	def forward(self, x):
	"""
	Returns output tensor after passing input `x` to the neural network
	block.

	Parameters
	----------
	x : tensor
	input to the NN block.

	Returns
	-------
	tensor
	output of the NN block.
	"""


	# Average pooling with kernel size 2 (2 x 2).
	x = F.avg_pool2d(x, 2)
	# Convolution + Leaky ReLU
	x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
	# Convolution + Leaky ReLU
	x = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
	return x

	class up(nn.Module):
	"""
	A class for creating neural network blocks containing layers:

	Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU

	This is used in the UNet Class to create a UNet like NN architecture.

	...

	Methods
	-------
	forward(x, skpCn)
	Returns output tensor after passing input `x` to the neural network
	block.
	"""


	def __init__(self, inChannels, outChannels):
	"""
	Parameters
	----------
	inChannels : int
	number of input channels for the first convolutional layer.
	outChannels : int
	number of output channels for the first convolutional layer.
	This is also used for setting input and output channels for
	the second convolutional layer.
	"""


	super(up, self).__init__()
	# Initialize convolutional layers.
	self.conv1 = nn.Conv2d(inChannels, outChannels, 3, stride=1, padding=1)
	# (2 * outChannels) is used for accommodating skip connection.
	self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)

	def forward(self, x, skpCn):
	x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
	if x.size(-1) != skpCn.size(-1):
	skpCn = skpCn[:, :, :, :x.size(-1)]
	if x.size(-2) != skpCn.size(-2):
	skpCn = skpCn[:, :, :x.size(-2), :]
	x = F.leaky_relu(self.conv1(x), negative_slope=0.1)
	x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)
	return x


	class UNet(nn.Module):
	"""
	A class for creating UNet like architecture as specified by the
	Super SloMo paper.

	...

	Methods
	-------
	forward(x)
	Returns output tensor after passing input `x` to the neural network
	block.
	"""


	def __init__(self, inChannels, outChannels):
	"""
	Parameters
	----------
	inChannels : int
	number of input channels for the UNet.
	outChannels : int
	number of output channels for the UNet.
	"""


	super(UNet, self).__init__()
	# Initialize neural network blocks.
	self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3)
	self.conv2 = nn.Conv2d(32, 32, 7, stride=1, padding=3)
	self.down1 = down(32, 64, 5)
	self.down2 = down(64, 128, 3)
	self.down3 = down(128, 256, 3)
	self.down4 = down(256, 512, 3)
	self.down5 = down(512, 512, 3)
	self.up1 = up(512, 512)
	self.up2 = up(512, 256)
	self.up3 = up(256, 128)
	self.up4 = up(128, 64)
	self.up5 = up(64, 32)
	self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1)

	def forward(self, x,time_steps=None):
	"""
	Returns output tensor after passing input `x` to the neural network.

	Parameters
	----------
	x : tensor
	input to the UNet.

	Returns
	-------
	tensor
	output of the UNet.
	"""
	if time_steps:
	time_steps = time_steps.view(-1,1,1,1).expand(-1,1,x.size(2),x.size(3))
	torch.cat((x,time_steps),1)


	x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
	s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
	s2 = self.down1(s1)
	s3 = self.down2(s2)
	s4 = self.down3(s3)
	s5 = self.down4(s4)
	x = self.down5(s5)
	x = self.up1(x, s5)
	x = self.up2(x, s4)
	x = self.up3(x, s3)
	x = self.up4(x, s2)
	x = self.up5(x, s1)
	x = F.leaky_relu(self.conv3(x), negative_slope = 0.1)
	return x


	class backWarp(nn.Module):
	"""
	A class for creating a backwarping object.

	This is used for backwarping to an image:

	Given optical flow from frame I0 to I1 --> F_0_1 and frame I1,
	it generates I0 <-- backwarp(F_0_1, I1).

	...

	Methods
	-------
	forward(x)
	Returns output tensor after passing input `img` and `flow` to the backwarping
	block.
	"""


	def __init__(self, W, H, device):
	"""
	Parameters
	----------
	W : int
	width of the image.
	H : int
	height of the image.
	device : device
	computation device (cpu/cuda).
	"""


	super(backWarp, self).__init__()
	# create a grid
	gridX, gridY = np.meshgrid(np.arange(W), np.arange(H))
	self.W = W
	self.H = H
	self.gridX = torch.tensor(gridX, requires_grad=False, device=device)
	self.gridY = torch.tensor(gridY, requires_grad=False, device=device)

	def forward(self, img, flow):
	"""
	Returns output tensor after passing input `img` and `flow` to the backwarping
	block.
	I0 = backwarp(I1, F_0_1)

	Parameters
	----------
	img : tensor
	frame I1.
	flow : tensor
	optical flow from I0 and I1: F_0_1.

	Returns
	-------
	tensor
	frame I0.
	"""


	# Extract horizontal and vertical flows.
	u = flow[:, 0, :, :]
	v = flow[:, 1, :, :]
	x = self.gridX.unsqueeze(0).expand_as(u).float() + u
	y = self.gridY.unsqueeze(0).expand_as(v).float() + v
	# range -1 to 1
	x = 2*(x/self.W - 0.5)
	y = 2*(y/self.H - 0.5)
	# stacking X and Y
	grid = torch.stack((x,y), dim=3)
	# Sample pixels using bilinear interpolation.
	imgOut = torch.nn.functional.grid_sample(img, grid)
	return imgOut


	# Creating an array of `t` values for the 7 intermediate frames between
	# reference frames I0 and I1.
	t = np.linspace(0.125, 0.875, 7)

	def getFlowCoeff (indices, device):
	"""
	Gets flow coefficients used for calculating intermediate optical
	flows from optical flows between I0 and I1: F_0_1 and F_1_0.

	F_t_0 = C00 x F_0_1 + C01 x F_1_0
	F_t_1 = C10 x F_0_1 + C11 x F_1_0

	where,
	C00 = -(1 - t) x t
	C01 = t x t
	C10 = (1 - t) x (1 - t)
	C11 = -t x (1 - t)

	Parameters
	----------
	indices : tensor
	indices corresponding to the intermediate frame positions
	of all samples in the batch.
	device : device
	computation device (cpu/cuda).

	Returns
	-------
	tensor
	coefficients C00, C01, C10, C11.
	"""


	# Convert indices tensor to numpy array
	ind = indices.detach().numpy()
	C11 = C00 = - (1 - (t[ind])) * (t[ind])
	C01 = (t[ind]) * (t[ind])
	C10 = (1 - (t[ind])) * (1 - (t[ind]))
	return torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device)

	def getWarpCoeff (indices, device):
	"""
	Gets coefficients used for calculating final intermediate
	frame `It_gen` from backwarped images using flows F_t_0 and F_t_1.

	It_gen = (C0 x V_t_0 x g_I_0_F_t_0 + C1 x V_t_1 x g_I_1_F_t_1) / (C0 x V_t_0 + C1 x V_t_1)

	where,
	C0 = 1 - t
	C1 = t

	V_t_0, V_t_1 --> visibility maps
	g_I_0_F_t_0, g_I_1_F_t_1 --> backwarped intermediate frames

	Parameters
	----------
	indices : tensor
	indices corresponding to the intermediate frame positions
	of all samples in the batch.
	device : device
	computation device (cpu/cuda).

	Returns
	-------
	tensor
	coefficients C0 and C1.
	"""


	# Convert indices tensor to numpy array
	ind = indices.detach().numpy()
	C0 = 1 - t[ind]
	C1 = t[ind]
	return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device)