Spaces:

AashishNKumar
/

video-quality-enhancement

Running

App Files Files Community

headscratchertm commited on Dec 4, 2024

Commit

2cd2753

1 Parent(s): f33899f

interpolated frames get generated for video

Browse files

Files changed (4) hide show

.gitignore +2 -1
frames.py +0 -11
main.py +66 -40
model.py +0 -3

.gitignore CHANGED Viewed

@@ -2,4 +2,5 @@
 output
 SuperSloMo.ckpt
 Test.mp4
-Result_Test

 output
 SuperSloMo.ckpt
 Test.mp4
+Result_Test
+interpolated_frames

frames.py CHANGED Viewed

@@ -1,9 +1,5 @@
 import cv2
 import os
-from PIL import Image
-from torchvision.transforms import transforms, ToTensor
-from torch import tensor
-from torchvision.transforms import ToPILImage,Resize
 import torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -35,13 +31,6 @@ def downsample(video_path, output_dir, target_fps):
     pass
-def load_frames(path,size=(128,128)) -> tensor: # converts PIL image to tensor on the GPU
-    image = Image.open(path).convert('RGB')
-    tensor = ToTensor()
-    resized_image=Resize(size)(image)
-    return tensor(resized_image).unsqueeze(0).to(device)
 if __name__ == "__main__":  # sets the __name__ variable to __main__ for this script

 import cv2
 import os
 import torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     pass
 if __name__ == "__main__":  # sets the __name__ variable to __main__ for this script

main.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import torch
 from model import UNet
 from PIL import Image
 from torchvision.transforms import transforms, ToTensor
 import torch.nn.functional as F
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def save_frames(tensor, out_path) -> None:
@@ -15,95 +19,117 @@ def normalize_frames(tensor):
     tensor = tensor.squeeze(0).detach().cpu()
     tensor = torch.clamp(tensor, 0.0, 1.0)  # Ensure values are in [0, 1]
     tensor = (tensor * 255).byte()  # Scale to [0, 255]
-    tensor = tensor.permute(1, 2, 0).numpy()  # Convert to [H, W, C]
     return tensor
 def load_frames(image_path)->torch.Tensor:
     transform = transforms.Compose([
-        ToTensor()  # Converts to [0, 1] range and [C, H, W]
     ])
     img = Image.open(image_path).convert("RGB")
-    tensor = transform(img).unsqueeze(0).to(device)  # Add batch dimension
     return tensor
 def time_steps(input_fps, output_fps) -> list[float]:
     if output_fps <= input_fps:
         return []
     k = output_fps // input_fps
     n = k - 1
     return [i / (n + 1) for i in range(1, n + 1)]
-def expand_channels(tensor, target):
-    batch_size, current_channels, height, width = tensor.shape
-    if current_channels >= target:
-        return tensor
-    required = target - current_channels
-    extra = torch.zeros(batch_size, required, height, width, device=tensor.device, dtype=tensor.dtype)
-    return torch.cat((tensor, extra), dim=1)
-def interpolate(model_FC, model_AT, A, B, input_fps, output_fps):
     interval = time_steps(input_fps, output_fps)
-    input_tensor = torch.cat((A, B), dim=1)  # Combine frames A and B
     with torch.no_grad():
         flow_output = model_FC(input_tensor)
         flow_forward = flow_output[:, :2, :, :]  # Forward flow
         flow_backward = flow_output[:, 2:4, :, :]  # Backward flow
     generated_frames = []
     with torch.no_grad():
         for t in interval:
             t_tensor = torch.tensor([t], dtype=torch.float32).view(1, 1, 1, 1).to(device)
-            warped_A = warp_frames(A, flow_forward * t_tensor)
-            warped_B = warp_frames(B, flow_backward * (1 - t_tensor))
-            interpolated_frame = warped_A * (1 - t_tensor) + warped_B * t_tensor
             generated_frames.append(interpolated_frame)
     return generated_frames
 def warp_frames(frame, flow):
     b, c, h, w = frame.size()
-    _, _, flow_h, flow_w = flow.size()
     if h != flow_h or w != flow_w:
         frame = F.interpolate(frame, size=(flow_h, flow_w), mode='bilinear', align_corners=True)
     grid_y, grid_x = torch.meshgrid(torch.arange(0, flow_h), torch.arange(0, flow_w), indexing="ij")
     grid_x = grid_x.float().to(device)
     grid_y = grid_y.float().to(device)
     flow_x = flow[:, 0, :, :]
     flow_y = flow[:, 1, :, :]
     x = grid_x.unsqueeze(0) + flow_x
     y = grid_y.unsqueeze(0) + flow_y
     x = 2.0 * x / (flow_w - 1) - 1.0
     y = 2.0 * y / (flow_h - 1) - 1.0
     grid = torch.stack((x, y), dim=-1)
-    warped_frame = F.grid_sample(frame, grid, align_corners=True)
     return warped_frame
 def solve():
     checkpoint = torch.load("SuperSloMo.ckpt")
     model_FC = UNet(6, 4).to(device)  # Initialize flow computation model
     model_FC.load_state_dict(checkpoint["state_dictFC"])  # Load weights
     model_FC.eval()
     model_AT = UNet(20, 5).to(device)  # Initialize auxiliary task model
     model_AT.load_state_dict(checkpoint["state_dictAT"], strict=False)  # Load weights
     model_AT.eval()
-    A = load_frames("output/1.png")
-    B = load_frames("output/10.png")
-    interpolated_frames = interpolate(model_FC, model_AT, A, B, 30, 90)
-    for index, value in enumerate(interpolated_frames):
-        save_frames(value[:, :3, :, :], f"Result_Test/image{index + 1}.png")  # Save only RGB channels
 def main():
     solve()

+import cv2
 import torch
 from model import UNet
 from PIL import Image
 from torchvision.transforms import transforms, ToTensor
 import torch.nn.functional as F
+from torch.cuda.amp import autocast
+import os
+import subprocess
+from torchvision.transforms import Resize
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def save_frames(tensor, out_path) -> None:
     tensor = tensor.squeeze(0).detach().cpu()
     tensor = torch.clamp(tensor, 0.0, 1.0)  # Ensure values are in [0, 1]
     tensor = (tensor * 255).byte()  # Scale to [0, 255]
+    tensor = tensor.permute(1, 2, 0).numpy()  # Convert to [H, W, C] height width channels
     return tensor
+def laod_allframes(frame_dir):
+    frames_path = sorted(
+        [os.path.join(frame_dir, f) for f in os.listdir(frame_dir) if f.endswith('.png')]
+    )
+    for frame_path in frames_path:
+        yield load_frames(frame_path)
 def load_frames(image_path)->torch.Tensor:
+    '''
+    Converts the PIL image(RGB) to a pytorch Tensor and loads into GPU
+    :params image_path
+    :return: pytorch tensor
+    '''
     transform = transforms.Compose([
+        Resize((720,1280)),
+        ToTensor()
     ])
     img = Image.open(image_path).convert("RGB")
+    tensor = transform(img).unsqueeze(0).to(device)
     return tensor
 def time_steps(input_fps, output_fps) -> list[float]:
+    '''
+    Generates Time intervals to interpolate between frames A and B
+    :param input_fps: Video FPS(Original)
+    :param output_fps: Target FPS(Output)
+    :return: List of intermediate FPS required between 2 Frames A and B
+    '''
     if output_fps <= input_fps:
         return []
     k = output_fps // input_fps
     n = k - 1
     return [i / (n + 1) for i in range(1, n + 1)]
+def interpolate_video(frames_dir,model_fc,input_fps,ouput_fps,output_dir):
+    os.makedirs(output_dir, exist_ok=True)
+    count=0
+    iterator=laod_allframes(frames_dir)
+    try:
+        prev_frame=next(iterator)
+        for curr_frame in iterator:
+            interpolated_frames=interpolate(model_fc,prev_frame,curr_frame,input_fps,ouput_fps)
+            save_frames(prev_frame,os.path.join(output_dir,"frame_{}.png".format(count)))
+            count+=1
+            for frame in interpolated_frames:
+                save_frames(frame[:,:3,:,:],os.path.join(output_dir,"frame_{}.png".format(count)))
+                count+=1
+            prev_frame=curr_frame
+        save_frames(prev_frame,os.path.join(output_dir,"frame_{}.png".format(count)))
+    except StopIteration:
+        print("no more Frames")
+def interpolate(model_FC, A, B, input_fps, output_fps)-> list[torch.Tensor]:
     interval = time_steps(input_fps, output_fps)
+    input_tensor = torch.cat((A, B), dim=1) # Concatenate Frame A and B to Compare difference
     with torch.no_grad():
         flow_output = model_FC(input_tensor)
         flow_forward = flow_output[:, :2, :, :]  # Forward flow
         flow_backward = flow_output[:, 2:4, :, :]  # Backward flow
     generated_frames = []
     with torch.no_grad():
         for t in interval:
             t_tensor = torch.tensor([t], dtype=torch.float32).view(1, 1, 1, 1).to(device)
+            with autocast():
+                warped_A = warp_frames(A, flow_forward * t_tensor)
+                warped_B = warp_frames(B, flow_backward * (1 - t_tensor))
+                interpolated_frame = warped_A * (1 - t_tensor) + warped_B * t_tensor
             generated_frames.append(interpolated_frame)
     return generated_frames
 def warp_frames(frame, flow):
     b, c, h, w = frame.size()
+    i,j,flow_h, flow_w = flow.size()
     if h != flow_h or w != flow_w:
         frame = F.interpolate(frame, size=(flow_h, flow_w), mode='bilinear', align_corners=True)
     grid_y, grid_x = torch.meshgrid(torch.arange(0, flow_h), torch.arange(0, flow_w), indexing="ij")
     grid_x = grid_x.float().to(device)
     grid_y = grid_y.float().to(device)
     flow_x = flow[:, 0, :, :]
     flow_y = flow[:, 1, :, :]
     x = grid_x.unsqueeze(0) + flow_x
     y = grid_y.unsqueeze(0) + flow_y
     x = 2.0 * x / (flow_w - 1) - 1.0
     y = 2.0 * y / (flow_h - 1) - 1.0
     grid = torch.stack((x, y), dim=-1)
+    warped_frame = F.grid_sample(frame, grid, align_corners=True,mode='bilinear', padding_mode='border')
     return warped_frame
+def frames_to_video(frame_dir,output_video,fps):
+    frame_pattern = os.path.join(frame_dir, "frame_.png")
+    subprocess.run([
+        "ffmpeg", "-framerate", str(fps), "-i", frame_pattern,
+        "-c:v", "libx264", "-pix_fmt", "yuv420p", output_video
+    ])
 def solve():
     checkpoint = torch.load("SuperSloMo.ckpt")
     model_FC = UNet(6, 4).to(device)  # Initialize flow computation model
     model_FC.load_state_dict(checkpoint["state_dictFC"])  # Load weights
     model_FC.eval()
     model_AT = UNet(20, 5).to(device)  # Initialize auxiliary task model
     model_AT.load_state_dict(checkpoint["state_dictAT"], strict=False)  # Load weights
     model_AT.eval()
+    frames_dir="output"
+    input_fps=60
+    output_fps=120
+    output_dir="interpolated_frames"
+    interpolate_video(frames_dir,model_FC,input_fps,output_fps,output_dir)
+    final_video="result.mp4"
+    frames_to_video(output_dir,final_video,output_fps)
 def main():
     solve()

model.py CHANGED Viewed

@@ -109,14 +109,11 @@ class up(nn.Module):
         self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
     def forward(self, x, skpCn):
-        # Upsample x
         x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
-        # Match dimensions by cropping the skip connection (skpCn) to match x
         if x.size(-1) != skpCn.size(-1):
             skpCn = skpCn[:, :, :, :x.size(-1)]
         if x.size(-2) != skpCn.size(-2):
             skpCn = skpCn[:, :, :x.size(-2), :]
-        # Concatenate and apply convolutions
         x = F.leaky_relu(self.conv1(x), negative_slope=0.1)
         x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)
         return x

         self.conv2 = nn.Conv2d(2 * outChannels, outChannels, 3, stride=1, padding=1)
     def forward(self, x, skpCn):
         x = F.interpolate(x, scale_factor=2, mode='bilinear', align_corners=True)
         if x.size(-1) != skpCn.size(-1):
             skpCn = skpCn[:, :, :, :x.size(-1)]
         if x.size(-2) != skpCn.size(-2):
             skpCn = skpCn[:, :, :x.size(-2), :]
         x = F.leaky_relu(self.conv1(x), negative_slope=0.1)
         x = F.leaky_relu(self.conv2(torch.cat((x, skpCn), 1)), negative_slope=0.1)
         return x