Spaces:

AashishNKumar
/

video-quality-enhancement

Running

App Files Files Community

headscratchertm commited on Dec 1, 2024

Commit

816f401

1 Parent(s): ddab39e

added Unet arch from original Repo

Browse files

Files changed (4) hide show

frames.py +17 -5
info.txt +7 -0
main.py +9 -0
model.py +361 -0

frames.py CHANGED Viewed

@@ -1,16 +1,28 @@
 import cv2
 import os
 def extract_frames(url_path,output_dir):
     os.makedirs(output_dir, exist_ok=True)
     frame_count=0
     cap=cv2.VideoCapture(url_path)
-    while cap.isOpened() and frame_count<10:
-        ret,frame=cap.read()
         if not ret:
             break
         frame_name=f"{frame_count}.png"
-        cv2.imwrite(os.path.join(output_dir, frame_name), frame)
         frame_count+=1
     cap.release()
-extract_frames("C:/Users/BRIDGES/Downloads/Video1.mp4","output")
-# this is a test change to merge later

 import cv2
 import os
 def extract_frames(url_path,output_dir):
+    '''
+    Acts as initial feed into the SuperSlomo Model
+    The Frames are stored in an output directory which is then loaded into the SuperSlomo Model.
+    :param url_path:
+    :param output_dir:
+    :return: None
+    '''
     os.makedirs(output_dir, exist_ok=True)
     frame_count=0
     cap=cv2.VideoCapture(url_path)
+    total_frames=int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps=int(cap.get(cv2.CAP_PROP_FPS))
+    while cap.isOpened():
+        ret,frame=cap.read() # frame is a numpy array
         if not ret:
             break
         frame_name=f"{frame_count}.png"
         frame_count+=1
+        cv2.imwrite(os.path.join(output_dir, frame_name), frame)
     cap.release()
+def downsample(video_path,output_dir,target_fps):
+    pass
+if __name__=="__main__": # sets the __name__ variable to __main__ for this script
+    extract_frames("Test.mp4","output")

info.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+ we need to decide how many frames our output video should have
+ now assume that the video is 1 min long at 30 fps.
+ k factor=fps_output/fps_input
+ k=90/30
+ k=3
+ # the output video will have T(total time(sec))x fps_output=60x90=5400

main.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+def solve():
+    checkpoint=torch.load("SuperSloMo.ckpt")
+    checkpoint.eval()
+    print(checkpoint)
+def main():
+    solve()
+if __name__=="__main__":
+    main()

model.py ADDED Viewed

	@@ -0,0 +1,361 @@

+import torch
+import torchvision
+import torchvision.transforms as transforms
+import torch.optim as optim
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+class down(nn.Module):
+    """
+    A class for creating neural network blocks containing layers:
+    Average Pooling --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
+    This is used in the UNet Class to create a UNet like NN architecture.
+    ...
+    Methods
+    -------
+    forward(x)
+        Returns output tensor after passing input `x` to the neural network
+        block.
+    """
+    def __init__(self, inChannels, outChannels, filterSize):
+        """
+        Parameters
+        ----------
+            inChannels : int
+                number of input channels for the first convolutional layer.
+            outChannels : int
+                number of output channels for the first convolutional layer.
+                This is also used as input and output channels for the
+                second convolutional layer.
+            filterSize : int
+                filter size for the convolution filter. input N would create
+                a N x N filter.
+        """
+        super(down, self).__init__()
+        # Initialize convolutional layers.
+        self.conv1 = nn.Conv2d(inChannels,  outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
+        self.conv2 = nn.Conv2d(outChannels, outChannels, filterSize, stride=1, padding=int((filterSize - 1) / 2))
+    def forward(self, x):
+        """
+        Returns output tensor after passing input `x` to the neural network
+        block.
+        Parameters
+        ----------
+            x : tensor
+                input to the NN block.
+        Returns
+        -------
+            tensor
+                output of the NN block.
+        """
+        # Average pooling with kernel size 2 (2 x 2).
+        x = F.avg_pool2d(x, 2)
+        # Convolution + Leaky ReLU
+        x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
+        # Convolution + Leaky ReLU
+        x = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
+        return x
+class up(nn.Module):
+    """
+    A class for creating neural network blocks containing layers:
+    Bilinear interpolation --> Convlution + Leaky ReLU --> Convolution + Leaky ReLU
+    This is used in the UNet Class to create a UNet like NN architecture.
+    ...
+    Methods
+    -------
+    forward(x, skpCn)
+        Returns output tensor after passing input `x` to the neural network
+        block.
+    """
+    def __init__(self, inChannels, outChannels):
+        """
+        Parameters
+        ----------
+            inChannels : int
+                number of input channels for the first convolutional layer.
+            outChannels : int
+                number of output channels for the first convolutional layer.
+                This is also used for setting input and output channels for
+                the second convolutional layer.
+        """
+        super(up, self).__init__()
+        # Initialize convolutional layers.
+        self.conv1 = nn.Conv2d(inChannels,  outChannels, 3, stride=1, padding=1)
+        # (2 * outChannels) is used for accommodating skip connection.
+        self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
+    def forward(self, x, skpCn):
+        """
+        Returns output tensor after passing input `x` to the neural network
+        block.
+        Parameters
+        ----------
+            x : tensor
+                input to the NN block.
+            skpCn : tensor
+                skip connection input to the NN block.
+        Returns
+        -------
+            tensor
+                output of the NN block.
+        """
+        # Bilinear interpolation with scaling 2.
+        x = F.interpolate(x, scale_factor=2, mode='bilinear')
+        # Convolution + Leaky ReLU
+        x = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
+        # Convolution + Leaky ReLU on (`x`, `skpCn`)
+        x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope = 0.1)
+        return x
+class UNet(nn.Module):
+    """
+    A class for creating UNet like architecture as specified by the
+    Super SloMo paper.
+    ...
+    Methods
+    -------
+    forward(x)
+        Returns output tensor after passing input `x` to the neural network
+        block.
+    """
+    def __init__(self, inChannels, outChannels):
+        """
+        Parameters
+        ----------
+            inChannels : int
+                number of input channels for the UNet.
+            outChannels : int
+                number of output channels for the UNet.
+        """
+        super(UNet, self).__init__()
+        # Initialize neural network blocks.
+        self.conv1 = nn.Conv2d(inChannels, 32, 7, stride=1, padding=3)
+        self.conv2 = nn.Conv2d(32, 32, 7, stride=1, padding=3)
+        self.down1 = down(32, 64, 5)
+        self.down2 = down(64, 128, 3)
+        self.down3 = down(128, 256, 3)
+        self.down4 = down(256, 512, 3)
+        self.down5 = down(512, 512, 3)
+        self.up1   = up(512, 512)
+        self.up2   = up(512, 256)
+        self.up3   = up(256, 128)
+        self.up4   = up(128, 64)
+        self.up5   = up(64, 32)
+        self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1)
+    def forward(self, x):
+        """
+        Returns output tensor after passing input `x` to the neural network.
+        Parameters
+        ----------
+            x : tensor
+                input to the UNet.
+        Returns
+        -------
+            tensor
+                output of the UNet.
+        """
+        x  = F.leaky_relu(self.conv1(x), negative_slope = 0.1)
+        s1 = F.leaky_relu(self.conv2(x), negative_slope = 0.1)
+        s2 = self.down1(s1)
+        s3 = self.down2(s2)
+        s4 = self.down3(s3)
+        s5 = self.down4(s4)
+        x  = self.down5(s5)
+        x  = self.up1(x, s5)
+        x  = self.up2(x, s4)
+        x  = self.up3(x, s3)
+        x  = self.up4(x, s2)
+        x  = self.up5(x, s1)
+        x  = F.leaky_relu(self.conv3(x), negative_slope = 0.1)
+        return x
+class backWarp(nn.Module):
+    """
+    A class for creating a backwarping object.
+    This is used for backwarping to an image:
+    Given optical flow from frame I0 to I1 --> F_0_1 and frame I1,
+    it generates I0 <-- backwarp(F_0_1, I1).
+    ...
+    Methods
+    -------
+    forward(x)
+        Returns output tensor after passing input `img` and `flow` to the backwarping
+        block.
+    """
+    def __init__(self, W, H, device):
+        """
+        Parameters
+        ----------
+            W : int
+                width of the image.
+            H : int
+                height of the image.
+            device : device
+                computation device (cpu/cuda).
+        """
+        super(backWarp, self).__init__()
+        # create a grid
+        gridX, gridY = np.meshgrid(np.arange(W), np.arange(H))
+        self.W = W
+        self.H = H
+        self.gridX = torch.tensor(gridX, requires_grad=False, device=device)
+        self.gridY = torch.tensor(gridY, requires_grad=False, device=device)
+    def forward(self, img, flow):
+        """
+        Returns output tensor after passing input `img` and `flow` to the backwarping
+        block.
+        I0  = backwarp(I1, F_0_1)
+        Parameters
+        ----------
+            img : tensor
+                frame I1.
+            flow : tensor
+                optical flow from I0 and I1: F_0_1.
+        Returns
+        -------
+            tensor
+                frame I0.
+        """
+        # Extract horizontal and vertical flows.
+        u = flow[:, 0, :, :]
+        v = flow[:, 1, :, :]
+        x = self.gridX.unsqueeze(0).expand_as(u).float() + u
+        y = self.gridY.unsqueeze(0).expand_as(v).float() + v
+        # range -1 to 1
+        x = 2*(x/self.W - 0.5)
+        y = 2*(y/self.H - 0.5)
+        # stacking X and Y
+        grid = torch.stack((x,y), dim=3)
+        # Sample pixels using bilinear interpolation.
+        imgOut = torch.nn.functional.grid_sample(img, grid)
+        return imgOut
+# Creating an array of `t` values for the 7 intermediate frames between
+# reference frames I0 and I1.
+t = np.linspace(0.125, 0.875, 7)
+def getFlowCoeff (indices, device):
+    """
+    Gets flow coefficients used for calculating intermediate optical
+    flows from optical flows between I0 and I1: F_0_1 and F_1_0.
+    F_t_0 = C00 x F_0_1 + C01 x F_1_0
+    F_t_1 = C10 x F_0_1 + C11 x F_1_0
+    where,
+    C00 = -(1 - t) x t
+    C01 = t x t
+    C10 = (1 - t) x (1 - t)
+    C11 = -t x (1 - t)
+    Parameters
+    ----------
+        indices : tensor
+            indices corresponding to the intermediate frame positions
+            of all samples in the batch.
+        device : device
+                computation device (cpu/cuda).
+    Returns
+    -------
+        tensor
+            coefficients C00, C01, C10, C11.
+    """
+    # Convert indices tensor to numpy array
+    ind = indices.detach().numpy()
+    C11 = C00 = - (1 - (t[ind])) * (t[ind])
+    C01 = (t[ind]) * (t[ind])
+    C10 = (1 - (t[ind])) * (1 - (t[ind]))
+    return torch.Tensor(C00)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C01)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C10)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C11)[None, None, None, :].permute(3, 0, 1, 2).to(device)
+def getWarpCoeff (indices, device):
+    """
+    Gets coefficients used for calculating final intermediate
+    frame `It_gen` from backwarped images using flows F_t_0 and F_t_1.
+    It_gen = (C0 x V_t_0 x g_I_0_F_t_0 + C1 x V_t_1 x g_I_1_F_t_1) / (C0 x V_t_0 + C1 x V_t_1)
+    where,
+    C0 = 1 - t
+    C1 = t
+    V_t_0, V_t_1 --> visibility maps
+    g_I_0_F_t_0, g_I_1_F_t_1 --> backwarped intermediate frames
+    Parameters
+    ----------
+        indices : tensor
+            indices corresponding to the intermediate frame positions
+            of all samples in the batch.
+        device : device
+                computation device (cpu/cuda).
+    Returns
+    -------
+        tensor
+            coefficients C0 and C1.
+    """
+    # Convert indices tensor to numpy array
+    ind = indices.detach().numpy()
+    C0 = 1 - t[ind]
+    C1 = t[ind]
+    return torch.Tensor(C0)[None, None, None, :].permute(3, 0, 1, 2).to(device), torch.Tensor(C1)[None, None, None, :].permute(3, 0, 1, 2).to(device)