Spaces:

AashishNKumar
/

video-quality-enhancement

Running

@@ -1,6 +1,13 @@
 import cv2
 import os
-def extract_frames(url_path,output_dir):
     '''
     Acts as initial feed into the SuperSlomo Model
     The Frames are stored in an output directory which is then loaded into the SuperSlomo Model.
@@ -9,20 +16,43 @@ def extract_frames(url_path,output_dir):
     :return: None
     '''
     os.makedirs(output_dir, exist_ok=True)
-    frame_count=0
-    cap=cv2.VideoCapture(url_path)
-    total_frames=int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    fps=int(cap.get(cv2.CAP_PROP_FPS))
     while cap.isOpened():
-        ret,frame=cap.read() # frame is a numpy array
         if not ret:
             break
-        frame_name=f"{frame_count}.png"
-        frame_count+=1
         cv2.imwrite(os.path.join(output_dir, frame_name), frame)
     cap.release()
-def downsample(video_path,output_dir,target_fps):
     pass
-if __name__=="__main__": # sets the __name__ variable to __main__ for this script
-    extract_frames("Test.mp4","output")

 import cv2
 import os
+from PIL import Image
+from torchvision.transforms import transforms, ToTensor
+from torch import tensor
+from torchvision.transforms import ToPILImage,Resize
+import torch
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def extract_frames(url_path, output_dir) -> int :
     '''
     Acts as initial feed into the SuperSlomo Model
     The Frames are stored in an output directory which is then loaded into the SuperSlomo Model.
     :return: None
     '''
     os.makedirs(output_dir, exist_ok=True)
+    frame_count = 0
+    cap = cv2.VideoCapture(url_path)
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
     while cap.isOpened():
+        ret, frame = cap.read()  # frame is a numpy array
         if not ret:
             break
+        frame_name = f"{frame_count}.png"
+        frame_count += 1
         cv2.imwrite(os.path.join(output_dir, frame_name), frame)
     cap.release()
+    return fps
+def downsample(video_path, output_dir, target_fps):
     pass
+def load_frames(path,size=(128,128)) -> tensor: # converts PIL image to tensor on the GPU
+    image = Image.open(path).convert('RGB')
+    tensor = ToTensor()
+    resized_image=Resize(size)(image)
+    return tensor(resized_image).unsqueeze(0).to(device)
+def save_frames(Tensor,output_path)->None: # Tensor to image
+    '''
+    Used to Save the Interpolated frame into the output directory.
+    :param Tensor:
+    :param output_path:
+    :return:
+    '''
+    transform=ToPILImage()
+    image=Tensor.squeeze(0).cpu()
+    image=transform(image)
+    image.save(output_path)
+if __name__ == "__main__":  # sets the __name__ variable to __main__ for this script
+    extract_frames("Test.mp4", "output")

info.txt CHANGED Viewed

	@@ -8,3 +8,6 @@
8	Need to atach Unet arch
9
10

 Need to atach Unet arch
+Interpolation Factor(k)=output fps/inputFps
+Number of frames Required between 2 frames(n)=k-1
+Time Step=1/n+1,2/n+1

main.py CHANGED Viewed

@@ -1,24 +1,54 @@
 import torch
 from model import UNet
 from PIL import Image
 from torchvision.transforms import transforms,ToTensor
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def load_frames(path):
-    image=Image.open(path).convert('RGB')
-    tensor=ToTensor()
-    return tensor(image).unsqueeze(0).to(device)
 def solve():
     checkpoint=torch.load("SuperSloMo.ckpt")
     model_FC=UNet(6,4) # initialize ARCH
     model_FC=model_FC.to(device)# reassign model tensors
     model_FC.load_state_dict(checkpoint["state_dictFC"]) # loading all weights from model
     model_AT=UNet(20,5)
-    model_AT.load_state_dict(checkpoint["state_dictAT"])
     model_AT=model_AT.to(device)
     model_AT.eval()
     model_FC.eval()
 def main():
     solve()

 import torch
 from model import UNet
+from frames import load_frames,save_frames
 from PIL import Image
 from torchvision.transforms import transforms,ToTensor
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+def time_steps(input_fps,output_fps)->list[float]:
+    if output_fps<=input_fps:
+        return []
+    k=output_fps//input_fps
+    n=k-1
+    return [i/n+1 for i in range(1,n+1)]
+def expand_channels(tensor,target):
+    batch_size,current_channels,height,width=tensor.shape
+    if current_channels>=target:
+        return tensor
+    required=target-current_channels
+    extra=torch.zeros(batch_size,required,height,width,device=tensor.device,dtype=tensor.dtype)
+    return torch.cat((tensor,extra),dim=1)
+def interpolate(model_FC,model_AT,A,B,input_fps,output_fps)-> list[float]:
+    interval=time_steps(input_fps,output_fps)
+    input_tensor=torch.cat((A,B),dim=1)
+    with torch.no_grad():
+        flow_output=model_FC(input_tensor)
+        flow_output=expand_channels(flow_output,20)
+    generated_frames=[]
+    with torch.no_grad():
+        for i in interval:
+            inter_tensor=torch.tensor([i],dtype=torch.float32).unsqueeze(0).to(device)
+            interpolated_frame=model_AT(flow_output,inter_tensor)
+            generated_frames.append(interpolated_frame)
+    return generated_frames
 def solve():
     checkpoint=torch.load("SuperSloMo.ckpt")
     model_FC=UNet(6,4) # initialize ARCH
     model_FC=model_FC.to(device)# reassign model tensors
     model_FC.load_state_dict(checkpoint["state_dictFC"]) # loading all weights from model
     model_AT=UNet(20,5)
+    model_AT.load_state_dict(checkpoint["state_dictAT"],strict=False)
     model_AT=model_AT.to(device)
     model_AT.eval()
     model_FC.eval()
+    A=load_frames("output/1.png")
+    B=load_frames("output/69.png")
+    interpolated_frames=interpolate(model_FC,model_AT,A,B,60,120)
+    print(interpolated_frames)
+    for index,value in enumerate(interpolated_frames):
+        save_frames(value[:,:3,:,:],"Result_Test/image{}.png".format(index+1))
 def main():
     solve()

model.py CHANGED Viewed

@@ -178,7 +178,7 @@ class UNet(nn.Module):
         self.up5   = up(64, 32)
         self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1)
-    def forward(self, x):
         """
         Returns output tensor after passing input `x` to the neural network.
@@ -192,6 +192,9 @@ class UNet(nn.Module):
             tensor
                 output of the UNet.
         """
         x  = F.leaky_relu(self.conv1(x), negative_slope = 0.1)

         self.up5   = up(64, 32)
         self.conv3 = nn.Conv2d(32, outChannels, 3, stride=1, padding=1)
+    def forward(self, x,time_steps=None):
         """
         Returns output tensor after passing input `x` to the neural network.
             tensor
                 output of the UNet.
         """
+        if time_steps:
+            time_steps = time_steps.view(-1,1,1,1).expand(-1,1,x.size(2),x.size(3))
+            torch.cat((x,time_steps),1)
         x  = F.leaky_relu(self.conv1(x), negative_slope = 0.1)