Spaces:

SpyC0der77
/

AI-Video-Stabilization

Running

App Files Files Community

SpyC0der77 commited on Mar 15

Commit

7561365

verified ·

1 Parent(s): 95f91c4

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -37

app.py CHANGED Viewed

@@ -2,18 +2,112 @@ import cv2
 import numpy as np
 import csv
 import math
 import tempfile
 import os
 import gradio as gr
 def read_motion_csv(csv_filename):
     """
-    Reads a CSV file with columns: frame, mag, ang, zoom.
-    For each row, computes a displacement from mag and ang and
-    accumulates these to build a per-frame cumulative offset.
     Returns:
-        A dictionary mapping frame numbers to (dx, dy) offsets.
     """
     motion_data = {}
     cumulative_dx = 0.0
@@ -24,15 +118,13 @@ def read_motion_csv(csv_filename):
             frame_num = int(row['frame'])
             mag = float(row['mag'])
             ang = float(row['ang'])
-            # Convert angle (in degrees) to radians
             rad = math.radians(ang)
-            # Compute displacement vector from magnitude and angle
             dx = mag * math.cos(rad)
             dy = mag * math.sin(rad)
-            # Accumulate the displacement over frames
             cumulative_dx += dx
             cumulative_dy += dy
-            # Store the negative cumulative offset to counteract motion
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
     return motion_data
@@ -42,25 +134,24 @@ def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, output_file=None):
     Args:
         video_file (str): Path to the input video.
-        csv_file (str): Path to the CSV file generated by the detection code.
-        zoom (float): Optional zoom factor to apply before stabilization (default: 1.0).
         output_file (str): Path for the output stabilized video. If None, a temporary file is created.
     Returns:
-        output_file (str): The path to the stabilized video file.
     """
     # Read motion data from CSV
     motion_data = read_motion_csv(csv_file)
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
-        raise ValueError("Could not open video file.")
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # Create a temporary file for output if not provided
     if output_file is None:
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
         output_file = temp_file.name
@@ -83,10 +174,10 @@ def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, output_file=None):
             start_y = max((zoomed_h - height) // 2, 0)
             frame = zoomed_frame[start_y:start_y+height, start_x:start_x+width]
-        # Retrieve stabilization offset from CSV data (if available)
         dx, dy = motion_data.get(frame_num, (0, 0))
-        # Apply an affine transformation to counteract the motion
         transform = np.array([[1, 0, dx],
                               [0, 1, dy]], dtype=np.float32)
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
@@ -96,48 +187,47 @@ def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, output_file=None):
     cap.release()
     out.release()
     return output_file
-def process_video(video_file, csv_file, zoom):
-    """
-    Gradio interface function to stabilize a video.
-    Accepts an input video file, a motion CSV file, and a zoom factor.
-    Returns the original video and the stabilized video.
     """
-    # Ensure the video file is provided
-    if video_file is None:
-        raise ValueError("Please upload a video file.")
-    # Convert file inputs to file paths if they come as dictionaries
     if isinstance(video_file, dict):
         video_file = video_file.get("name", None)
-    if isinstance(csv_file, dict):
-        csv_file = csv_file.get("name", None)
-    # Check that both file paths are available
     if video_file is None:
-        raise ValueError("Video file path is missing.")
-    if csv_file is None:
-        raise ValueError("CSV file path is missing. Please upload a CSV file.")
     stabilized_path = stabilize_video_using_csv(video_file, csv_file, zoom=zoom)
     return video_file, stabilized_path
 with gr.Blocks() as demo:
-    gr.Markdown("# Video Stabilization with Motion Data")
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Input Video")
-            csv_input = gr.File(label="Motion CSV File (e.g., video.flow.csv)", file_count="single")
             zoom_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.1, value=1.0, label="Zoom Factor")
-            process_button = gr.Button("Stabilize Video")
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
     process_button.click(
-        fn=process_video,
-        inputs=[video_input, csv_input, zoom_slider],
         outputs=[original_video, stabilized_video]
     )

 import numpy as np
 import csv
 import math
+import torch
 import tempfile
 import os
 import gradio as gr
+# Load the RAFT model from torch.hub (uses the 'raft_small' variant)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True)
+model = model.to(device)
+model.eval()
+def generate_motion_csv(video_file, output_csv=None):
+    """
+    Uses the RAFT model to compute optical flow between consecutive frames,
+    then writes a CSV file (with columns: frame, mag, ang, zoom) where:
+      - mag: median magnitude of the flow,
+      - ang: median angle (in degrees), and
+      - zoom: fraction of pixels moving away from the image center.
+    Args:
+        video_file (str): Path to the input video.
+        output_csv (str): Optional path for output CSV file. If None, a temporary file is used.
+    Returns:
+        output_csv (str): Path to the generated CSV file.
+    """
+    if output_csv is None:
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
+        output_csv = temp_file.name
+        temp_file.close()
+    cap = cv2.VideoCapture(video_file)
+    if not cap.isOpened():
+        raise ValueError("Could not open video file for CSV generation.")
+    # Prepare CSV file for writing
+    with open(output_csv, 'w', newline='') as csvfile:
+        fieldnames = ['frame', 'mag', 'ang', 'zoom']
+        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+        writer.writeheader()
+        ret, prev_frame = cap.read()
+        if not ret:
+            raise ValueError("Cannot read first frame from video.")
+        # Convert the first frame to tensor
+        prev_frame_rgb = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2RGB)
+        prev_tensor = torch.from_numpy(prev_frame_rgb).permute(2,0,1).float().unsqueeze(0) / 255.0
+        prev_tensor = prev_tensor.to(device)
+        frame_idx = 1
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                break
+            curr_frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            curr_tensor = torch.from_numpy(curr_frame_rgb).permute(2,0,1).float().unsqueeze(0) / 255.0
+            curr_tensor = curr_tensor.to(device)
+            # Use RAFT to compute optical flow between previous and current frame.
+            with torch.no_grad():
+                # The RAFT model returns a low-resolution flow and an upsampled (high-res) flow.
+                flow_low, flow_up = model(prev_tensor, curr_tensor, iters=20, test_mode=True)
+            # Convert flow to numpy array (shape: H x W x 2)
+            flow = flow_up[0].permute(1,2,0).cpu().numpy()
+            # Compute median magnitude and angle for the optical flow
+            mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1], angleInDegrees=True)
+            median_mag = np.median(mag)
+            median_ang = np.median(ang)
+            # Compute a "zoom factor": fraction of pixels moving away from the center.
+            h, w = flow.shape[:2]
+            center_x, center_y = w / 2, h / 2
+            x_coords, y_coords = np.meshgrid(np.arange(w), np.arange(h))
+            x_offset = x_coords - center_x
+            y_offset = y_coords - center_y
+            # Dot product between flow vectors and pixel offsets:
+            dot = flow[...,0] * x_offset + flow[...,1] * y_offset
+            zoom_factor = np.count_nonzero(dot > 0) / (w * h)
+            # Write the computed metrics to the CSV file.
+            writer.writerow({
+                'frame': frame_idx,
+                'mag': median_mag,
+                'ang': median_ang,
+                'zoom': zoom_factor
+            })
+            # Update for next iteration
+            prev_tensor = curr_tensor.clone()
+            frame_idx += 1
+    cap.release()
+    print(f"Motion CSV generated: {output_csv}")
+    return output_csv
 def read_motion_csv(csv_filename):
     """
+    Reads the CSV file (columns: frame, mag, ang, zoom) and computes a cumulative
+    offset per frame to be used for stabilization.
     Returns:
+        A dictionary mapping frame numbers to (dx, dy) offsets (the negative cumulative displacement).
     """
     motion_data = {}
     cumulative_dx = 0.0
             frame_num = int(row['frame'])
             mag = float(row['mag'])
             ang = float(row['ang'])
+            # Convert angle (in degrees) to radians.
             rad = math.radians(ang)
             dx = mag * math.cos(rad)
             dy = mag * math.sin(rad)
             cumulative_dx += dx
             cumulative_dy += dy
+            # Negative cumulative offset counteracts the detected motion.
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
     return motion_data
     Args:
         video_file (str): Path to the input video.
+        csv_file (str): Path to the motion CSV file.
+        zoom (float): Zoom factor to apply before stabilization (default: 1.0, no zoom).
         output_file (str): Path for the output stabilized video. If None, a temporary file is created.
     Returns:
+        output_file (str): Path to the stabilized video file.
     """
     # Read motion data from CSV
     motion_data = read_motion_csv(csv_file)
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
+        raise ValueError("Could not open video file for stabilization.")
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     if output_file is None:
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
         output_file = temp_file.name
             start_y = max((zoomed_h - height) // 2, 0)
             frame = zoomed_frame[start_y:start_y+height, start_x:start_x+width]
+        # Get the stabilization offset for the current frame (default to (0,0) if not available)
         dx, dy = motion_data.get(frame_num, (0, 0))
+        # Apply an affine transformation to counteract the motion.
         transform = np.array([[1, 0, dx],
                               [0, 1, dy]], dtype=np.float32)
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
     cap.release()
     out.release()
+    print(f"Stabilized video saved to: {output_file}")
     return output_file
+def process_video_ai(video_file, zoom):
     """
+    Gradio interface function: Given an input video and a zoom factor,
+    it uses a deep learning model (RAFT) to generate motion data (video.flow.csv)
+    and then stabilizes the video based on that data.
+    Returns:
+        A tuple containing the original video file path and the stabilized video file path.
+    """
+    # Ensure the input is a file path (if provided as a dict, extract the "name")
     if isinstance(video_file, dict):
         video_file = video_file.get("name", None)
     if video_file is None:
+        raise ValueError("Please upload a video file.")
+    # Generate motion CSV using AI-based optical flow (RAFT)
+    csv_file = generate_motion_csv(video_file)
+    # Stabilize the video using the generated CSV data
     stabilized_path = stabilize_video_using_csv(video_file, csv_file, zoom=zoom)
     return video_file, stabilized_path
+# Build the Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("# AI-Powered Video Stabilization")
+    gr.Markdown("Upload a video and select a zoom factor. The system will automatically use a deep learning model (RAFT) to generate motion data and then stabilize the video.")
     with gr.Row():
         with gr.Column():
             video_input = gr.Video(label="Input Video")
             zoom_slider = gr.Slider(minimum=1.0, maximum=2.0, step=0.1, value=1.0, label="Zoom Factor")
+            process_button = gr.Button("Process Video")
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
     process_button.click(
+        fn=process_video_ai,
+        inputs=[video_input, zoom_slider],
         outputs=[original_video, stabilized_video]
     )