Spaces:

AashishNKumar
/

video-quality-enhancement

Running

App Files Files Community

AashishNKumar commited on Dec 5, 2024

Commit

702c069

·

1 Parent(s): 977bcc2

add SuperSloMo.ckpt

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +9 -2
main.py +156 -63
output_frames/frame_0.png +0 -0
output_frames/frame_1.png +0 -0
output_frames/frame_10.png +0 -0
output_frames/frame_100.png +0 -0
output_frames/frame_101.png +0 -0
output_frames/frame_102.png +0 -0
output_frames/frame_103.png +0 -0
output_frames/frame_104.png +0 -0
output_frames/frame_105.png +0 -0
output_frames/frame_106.png +0 -0
output_frames/frame_107.png +0 -0
output_frames/frame_108.png +0 -0
output_frames/frame_109.png +0 -0
output_frames/frame_11.png +0 -0
output_frames/frame_110.png +0 -0
output_frames/frame_111.png +0 -0
output_frames/frame_112.png +0 -0
output_frames/frame_113.png +0 -0
output_frames/frame_114.png +0 -0
output_frames/frame_115.png +0 -0
output_frames/frame_116.png +0 -0
output_frames/frame_117.png +0 -0
output_frames/frame_118.png +0 -0
output_frames/frame_119.png +0 -0
output_frames/frame_12.png +0 -0
output_frames/frame_120.png +0 -0
output_frames/frame_13.png +0 -0
output_frames/frame_14.png +0 -0
output_frames/frame_15.png +0 -0
output_frames/frame_16.png +0 -0
output_frames/frame_17.png +0 -0
output_frames/frame_18.png +0 -0
output_frames/frame_19.png +0 -0
output_frames/frame_2.png +0 -0
output_frames/frame_20.png +0 -0
output_frames/frame_21.png +0 -0
output_frames/frame_22.png +0 -0
output_frames/frame_23.png +0 -0
output_frames/frame_24.png +0 -0
output_frames/frame_25.png +0 -0
output_frames/frame_26.png +0 -0
output_frames/frame_27.png +0 -0
output_frames/frame_28.png +0 -0
output_frames/frame_29.png +0 -0
output_frames/frame_3.png +0 -0
output_frames/frame_30.png +0 -0
output_frames/frame_31.png +0 -0
output_frames/frame_32.png +0 -0

.gitignore CHANGED Viewed

@@ -1,6 +1,6 @@
 .idea
 output
-SuperSloMo.ckpt
 Test.mp4
 Result_Test
 interpolated_frames
@@ -14,4 +14,11 @@ result2.mp4
 result3.mp4
 result4.mp4
 result5.mp4
-result6.mp4

 .idea
 output
+#SuperSloMo.ckpt
 Test.mp4
 Result_Test
 interpolated_frames
 result3.mp4
 result4.mp4
 result5.mp4
+result6.mp4
+**/__pycache__/
+.ropeproject/
+.gitattributes
+.venv

main.py CHANGED Viewed

@@ -1,82 +1,108 @@
-import cv2
-import torch
-from model import UNet
-from PIL import Image
 from torchvision.transforms import transforms, ToTensor
-import torch.nn.functional as F
 from torch.cuda.amp import autocast
-import os
 import subprocess
-from torchvision.transforms import Resize
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def save_frames(tensor, out_path) -> None:
     image = normalize_frames(tensor)
     image = Image.fromarray(image)
     image.save(out_path)
 def normalize_frames(tensor):
     tensor = tensor.squeeze(0).detach().cpu()
     tensor = torch.clamp(tensor, 0.0, 1.0)  # Ensure values are in [0, 1]
     tensor = (tensor * 255).byte()  # Scale to [0, 255]
-    tensor = tensor.permute(1, 2, 0).numpy()  # Convert to [H, W, C] height width channels
     return tensor
 def laod_allframes(frame_dir):
     frames_path = sorted(
-        [os.path.join(frame_dir, f) for f in os.listdir(frame_dir) if f.endswith('.png')],
-        key=lambda x: int(os.path.splitext(os.path.basename(x))[0].split('_')[-1])
     )
     print(frames_path)
     for frame_path in frames_path:
         yield load_frames(frame_path)
-def load_frames(image_path)->torch.Tensor:
-    '''
     Converts the PIL image(RGB) to a pytorch Tensor and loads into GPU
     :params image_path
     :return: pytorch tensor
-    '''
-    transform = transforms.Compose([
-        Resize((720,1280)),
-        ToTensor()
-    ])
     img = Image.open(image_path).convert("RGB")
     tensor = transform(img).unsqueeze(0).to(device)
     return tensor
 def time_steps(input_fps, output_fps) -> list[float]:
-    '''
     Generates Time intervals to interpolate between frames A and B
     :param input_fps: Video FPS(Original)
     :param output_fps: Target FPS(Output)
     :return: List of intermediate FPS required between 2 Frames A and B
-    '''
     if output_fps <= input_fps:
         return []
     k = output_fps // input_fps
     n = k - 1
     return [i / (n + 1) for i in range(1, n + 1)]
-def interpolate_video(frames_dir,model_fc,input_fps,ouput_fps,output_dir):
     os.makedirs(output_dir, exist_ok=True)
-    count=0
-    iterator=laod_allframes(frames_dir)
     try:
-        prev_frame=next(iterator)
         for curr_frame in iterator:
-            interpolated_frames=interpolate(model_fc,prev_frame,curr_frame,input_fps,ouput_fps)
-            save_frames(prev_frame,os.path.join(output_dir,"frame_{}.png".format(count)))
-            count+=1
             for frame in interpolated_frames:
-                save_frames(frame[:,:3,:,:],os.path.join(output_dir,"frame_{}.png".format(count)))
-                count+=1
-            prev_frame=curr_frame
-        save_frames(prev_frame,os.path.join(output_dir,"frame_{}.png".format(count)))
     except StopIteration:
         print("no more Frames")
-def interpolate(model_FC, A, B, input_fps, output_fps)-> list[torch.Tensor]:
     interval = time_steps(input_fps, output_fps)
-    input_tensor = torch.cat((A, B), dim=1) # Concatenate Frame A and B to Compare difference
     with torch.no_grad():
         flow_output = model_FC(input_tensor)
         flow_forward = flow_output[:, :2, :, :]  # Forward flow
@@ -84,7 +110,9 @@ def interpolate(model_FC, A, B, input_fps, output_fps)-> list[torch.Tensor]:
     generated_frames = []
     with torch.no_grad():
         for t in interval:
-            t_tensor = torch.tensor([t], dtype=torch.float32).view(1, 1, 1, 1).to(device)
             with autocast():
                 warped_A = warp_frames(A, flow_forward * t_tensor)
                 warped_B = warp_frames(B, flow_backward * (1 - t_tensor))
@@ -92,12 +120,17 @@ def interpolate(model_FC, A, B, input_fps, output_fps)-> list[torch.Tensor]:
             generated_frames.append(interpolated_frame)
     return generated_frames
 def warp_frames(frame, flow):
     b, c, h, w = frame.size()
-    i,j,flow_h, flow_w = flow.size()
     if h != flow_h or w != flow_w:
-        frame = F.interpolate(frame, size=(flow_h, flow_w), mode='bilinear', align_corners=True)
-    grid_y, grid_x = torch.meshgrid(torch.arange(0, flow_h), torch.arange(0, flow_w), indexing="ij")
     grid_x = grid_x.float().to(device)
     grid_y = grid_y.float().to(device)
     flow_x = flow[:, 0, :, :]
@@ -108,39 +141,99 @@ def warp_frames(frame, flow):
     y = 2.0 * y / (flow_h - 1) - 1.0
     grid = torch.stack((x, y), dim=-1)
-    warped_frame = F.grid_sample(frame, grid, align_corners=True,mode='bilinear', padding_mode='border')
     return warped_frame
-def frames_to_video(frame_dir,output_video,fps):
     frame_files = sorted(
-        [f for f in os.listdir(frame_dir) if f.endswith('.png')],
-        key=lambda x: int(os.path.splitext(x)[0].split('_')[-1])
     )
     print(frame_files)
     for i, frame in enumerate(frame_files):
-        os.rename(os.path.join(frame_dir, frame), os.path.join(frame_dir, f"frame_{i}.png"))
     frame_pattern = os.path.join(frame_dir, "frame_%d.png")
-    subprocess.run([ #  run shell command
-        "ffmpeg", "-framerate", str(fps), "-i", frame_pattern,
-        "-c:v", "libx264", "-pix_fmt", "yuv420p", output_video
-    ],check=True)
-def solve():
-    checkpoint = torch.load("SuperSloMo.ckpt")
-    model_FC = UNet(6, 4).to(device)  # Initialize flow computation model
-    model_FC.load_state_dict(checkpoint["state_dictFC"])  # Load weights
     model_FC.eval()
-    model_AT = UNet(20, 5).to(device)  # Initialize auxiliary task model
-    model_AT.load_state_dict(checkpoint["state_dictAT"], strict=False)  # Load weights
-    model_AT.eval()
-    frames_dir="output"
-    input_fps=59
-    output_fps=120
-    output_dir="interpolated_frames2"
-    interpolate_video(frames_dir,model_FC,input_fps,output_fps,output_dir)
-    final_video="result6.mp4"
-    frames_to_video(output_dir,final_video,output_fps)
-def main():
-    solve()
 if __name__ == "__main__":
-    main()

 from torchvision.transforms import transforms, ToTensor
+from torchvision.transforms import Resize
 from torch.cuda.amp import autocast
+import torch.nn.functional as F
+from PIL import Image
+import gradio as gr
 import subprocess
+import os
+import torch
+import cv2
+from model import UNet
+from frames import extract_frames
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def save_frames(tensor, out_path) -> None:
     image = normalize_frames(tensor)
     image = Image.fromarray(image)
     image.save(out_path)
 def normalize_frames(tensor):
     tensor = tensor.squeeze(0).detach().cpu()
     tensor = torch.clamp(tensor, 0.0, 1.0)  # Ensure values are in [0, 1]
     tensor = (tensor * 255).byte()  # Scale to [0, 255]
+    tensor = tensor.permute(
+        1, 2, 0
+    ).numpy()  # Convert to [H, W, C] height width channels
     return tensor
 def laod_allframes(frame_dir):
     frames_path = sorted(
+        [
+            os.path.join(frame_dir, f)
+            for f in os.listdir(frame_dir)
+            if f.endswith(".png")
+        ],
+        key=lambda x: int(os.path.splitext(os.path.basename(x))[0].split("_")[-1]),
     )
     print(frames_path)
     for frame_path in frames_path:
         yield load_frames(frame_path)
+def load_frames(image_path) -> torch.Tensor:
+    """
     Converts the PIL image(RGB) to a pytorch Tensor and loads into GPU
     :params image_path
     :return: pytorch tensor
+    """
+    transform = transforms.Compose([Resize((720, 1280)), ToTensor()])
     img = Image.open(image_path).convert("RGB")
     tensor = transform(img).unsqueeze(0).to(device)
     return tensor
 def time_steps(input_fps, output_fps) -> list[float]:
+    """
     Generates Time intervals to interpolate between frames A and B
     :param input_fps: Video FPS(Original)
     :param output_fps: Target FPS(Output)
     :return: List of intermediate FPS required between 2 Frames A and B
+    """
     if output_fps <= input_fps:
         return []
     k = output_fps // input_fps
     n = k - 1
     return [i / (n + 1) for i in range(1, n + 1)]
+def interpolate_video(frames_dir, model_fc, input_fps, ouput_fps, output_dir):
     os.makedirs(output_dir, exist_ok=True)
+    count = 0
+    iterator = laod_allframes(frames_dir)
     try:
+        prev_frame = next(iterator)
         for curr_frame in iterator:
+            interpolated_frames = interpolate(
+                model_fc, prev_frame, curr_frame, input_fps, ouput_fps
+            )
+            save_frames(
+                prev_frame, os.path.join(output_dir, "frame_{}.png".format(count))
+            )
+            count += 1
             for frame in interpolated_frames:
+                save_frames(
+                    frame[:, :3, :, :],
+                    os.path.join(output_dir, "frame_{}.png".format(count)),
+                )
+                count += 1
+            prev_frame = curr_frame
+        save_frames(prev_frame, os.path.join(output_dir, "frame_{}.png".format(count)))
     except StopIteration:
         print("no more Frames")
+def interpolate(model_FC, A, B, input_fps, output_fps) -> list[torch.Tensor]:
     interval = time_steps(input_fps, output_fps)
+    input_tensor = torch.cat(
+        (A, B), dim=1
+    )  # Concatenate Frame A and B to Compare difference
     with torch.no_grad():
         flow_output = model_FC(input_tensor)
         flow_forward = flow_output[:, :2, :, :]  # Forward flow
     generated_frames = []
     with torch.no_grad():
         for t in interval:
+            t_tensor = (
+                torch.tensor([t], dtype=torch.float32).view(1, 1, 1, 1).to(device)
+            )
             with autocast():
                 warped_A = warp_frames(A, flow_forward * t_tensor)
                 warped_B = warp_frames(B, flow_backward * (1 - t_tensor))
             generated_frames.append(interpolated_frame)
     return generated_frames
 def warp_frames(frame, flow):
     b, c, h, w = frame.size()
+    i, j, flow_h, flow_w = flow.size()
     if h != flow_h or w != flow_w:
+        frame = F.interpolate(
+            frame, size=(flow_h, flow_w), mode="bilinear", align_corners=True
+        )
+    grid_y, grid_x = torch.meshgrid(
+        torch.arange(0, flow_h), torch.arange(0, flow_w), indexing="ij"
+    )
     grid_x = grid_x.float().to(device)
     grid_y = grid_y.float().to(device)
     flow_x = flow[:, 0, :, :]
     y = 2.0 * y / (flow_h - 1) - 1.0
     grid = torch.stack((x, y), dim=-1)
+    warped_frame = F.grid_sample(
+        frame, grid, align_corners=True, mode="bilinear", padding_mode="border"
+    )
     return warped_frame
+def frames_to_video(frame_dir, output_video, fps):
     frame_files = sorted(
+        [f for f in os.listdir(frame_dir) if f.endswith(".png")],
+        key=lambda x: int(os.path.splitext(x)[0].split("_")[-1]),
     )
     print(frame_files)
     for i, frame in enumerate(frame_files):
+        os.rename(
+            os.path.join(frame_dir, frame), os.path.join(frame_dir, f"frame_{i}.png")
+        )
     frame_pattern = os.path.join(frame_dir, "frame_%d.png")
+    subprocess.run(
+        [  #  run shell command
+            "ffmpeg",
+            "-framerate",
+            str(fps),
+            "-i",
+            frame_pattern,
+            "-c:v",
+            "libx264",
+            "-pix_fmt",
+            "yuv420p",
+            output_video,
+        ],
+        check=True,
+    )
+# def solve():
+#     checkpoint = torch.load("SuperSloMo.ckpt")
+#     model_FC = UNet(6, 4).to(device)  # Initialize flow computation model
+#     model_FC.load_state_dict(checkpoint["state_dictFC"])  # Load weights
+#     model_FC.eval()
+#     model_AT = UNet(20, 5).to(device)  # Initialize auxiliary task model
+#     model_AT.load_state_dict(checkpoint["state_dictAT"], strict=False)  # Load weights
+#     model_AT.eval()
+#     frames_dir = "output"
+#     input_fps = 59
+#     output_fps = 120
+#     output_dir = "interpolated_frames2"
+#     interpolate_video(frames_dir, model_FC, input_fps, output_fps, output_dir)
+#     final_video = "result6.mp4"
+#     frames_to_video(output_dir, final_video, output_fps)
+# def main():
+#     solve()
+# if __name__ == "__main__":
+#     main()
+def process_video(video_path, output_fps):
+    # Ensure the output directory for frames exists
+    input_fps = extract_frames(video_path, "output_frames")
+    # Load model
+    model_FC = UNet(6, 4).to(device)
+    checkpoint = torch.load("SuperSloMo.ckpt", map_location=device)
+    model_FC.load_state_dict(checkpoint["state_dictFC"])
     model_FC.eval()
+    # Interpolate video
+    output_dir = "interpolated_frames"
+    interpolate_video("output_frames", model_FC, input_fps, output_fps, output_dir)
+    # Generate output video
+    final_video_path = "result.mp4"
+    frames_to_video(output_dir, final_video_path, output_fps)
+    return final_video_path  # Return the output video file path
+interface = gr.Interface(
+    fn=process_video,
+    inputs=[
+        gr.Video(label="Upload Input Video"),  # No 'type' argument required
+        gr.Slider(
+            minimum=30, maximum=120, step=1, value=60, label="Desired Output FPS"
+        ),
+    ],
+    outputs=gr.Video(label="Output Interpolated Video"),
+    title="Video Frame Interpolation with SuperSloMo",
+    description="This application allows you to input a video and increase its frame rate by interpolation using a deep learning model.",
+)
 if __name__ == "__main__":
+    interface.launch()  # Starts the Gradio interface

output_frames/frame_0.png ADDED Viewed

output_frames/frame_1.png ADDED Viewed

output_frames/frame_10.png ADDED Viewed

output_frames/frame_100.png ADDED Viewed

output_frames/frame_101.png ADDED Viewed

output_frames/frame_102.png ADDED Viewed

output_frames/frame_103.png ADDED Viewed

output_frames/frame_104.png ADDED Viewed

output_frames/frame_105.png ADDED Viewed

output_frames/frame_106.png ADDED Viewed

output_frames/frame_107.png ADDED Viewed

output_frames/frame_108.png ADDED Viewed

output_frames/frame_109.png ADDED Viewed

output_frames/frame_11.png ADDED Viewed

output_frames/frame_110.png ADDED Viewed

output_frames/frame_111.png ADDED Viewed

output_frames/frame_112.png ADDED Viewed

output_frames/frame_113.png ADDED Viewed

output_frames/frame_114.png ADDED Viewed

output_frames/frame_115.png ADDED Viewed

output_frames/frame_116.png ADDED Viewed

output_frames/frame_117.png ADDED Viewed

output_frames/frame_118.png ADDED Viewed

output_frames/frame_119.png ADDED Viewed

output_frames/frame_12.png ADDED Viewed

output_frames/frame_120.png ADDED Viewed

output_frames/frame_13.png ADDED Viewed

output_frames/frame_14.png ADDED Viewed

output_frames/frame_15.png ADDED Viewed

output_frames/frame_16.png ADDED Viewed

output_frames/frame_17.png ADDED Viewed

output_frames/frame_18.png ADDED Viewed

output_frames/frame_19.png ADDED Viewed

output_frames/frame_2.png ADDED Viewed

output_frames/frame_20.png ADDED Viewed

output_frames/frame_21.png ADDED Viewed

output_frames/frame_22.png ADDED Viewed

output_frames/frame_23.png ADDED Viewed

output_frames/frame_24.png ADDED Viewed

output_frames/frame_25.png ADDED Viewed

output_frames/frame_26.png ADDED Viewed

output_frames/frame_27.png ADDED Viewed

output_frames/frame_28.png ADDED Viewed

output_frames/frame_29.png ADDED Viewed

output_frames/frame_3.png ADDED Viewed

output_frames/frame_30.png ADDED Viewed

output_frames/frame_31.png ADDED Viewed

output_frames/frame_32.png ADDED Viewed