Spaces:

SpyC0der77
/

AI-Video-Stabilization

Running

App Files Files Community

SpyC0der77 commited on Mar 15

Commit

8653b6e

verified ·

1 Parent(s): 89bc003

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -45

app.py CHANGED Viewed

@@ -6,37 +6,33 @@ import torch
 import tempfile
 import os
 import gradio as gr
 # Set up device for torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"Using device: {device}")
 # Try to load the RAFT model from torch.hub.
-# If it fails (e.g. due to repository structure changes), we will fall back to OpenCV optical flow.
 try:
-    # The trust_repo parameter might prompt for confirmation; set it to True.
     raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
     raft_model = raft_model.to(device)
     raft_model.eval()
-    print("RAFT model loaded successfully.")
 except Exception as e:
-    print("Error loading RAFT model:", e)
-    print("Falling back to OpenCV optical flow for motion CSV generation.")
     raft_model = None
 def generate_motion_csv(video_file, output_csv=None):
     """
     Generates a CSV file with motion data (columns: frame, mag, ang, zoom) from an input video.
-    If the RAFT model is available, it uses it to compute optical flow; otherwise, it falls back to
-    OpenCV's Farneback optical flow.
-    Args:
-        video_file (str): Path to the input video.
-        output_csv (str): Optional output CSV file path. If None, a temporary file is created.
-    Returns:
-        output_csv (str): Path to the generated CSV file.
     """
     if output_csv is None:
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
         output_csv = temp_file.name
@@ -44,8 +40,9 @@ def generate_motion_csv(video_file, output_csv=None):
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
-        raise ValueError("Could not open video file for CSV generation.")
     with open(output_csv, 'w', newline='') as csvfile:
         fieldnames = ['frame', 'mag', 'ang', 'zoom']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
@@ -53,17 +50,20 @@ def generate_motion_csv(video_file, output_csv=None):
         ret, first_frame = cap.read()
         if not ret:
-            raise ValueError("Cannot read first frame from video.")
         if raft_model is not None:
-            # Convert the first frame to RGB and then to a torch tensor.
             first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
             prev_tensor = torch.from_numpy(first_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
             prev_tensor = prev_tensor.to(device)
         else:
             prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
         frame_idx = 1
         while True:
             ret, frame = cap.read()
             if not ret:
@@ -105,20 +105,24 @@ def generate_motion_csv(video_file, output_csv=None):
                 'zoom': zoom_factor
             })
             frame_idx += 1
     cap.release()
-    print(f"Motion CSV generated: {output_csv}")
     return output_csv
 def read_motion_csv(csv_filename):
     """
     Reads a motion CSV file (with columns: frame, mag, ang, zoom) and computes a cumulative
-    offset per frame (the negative cumulative displacement) for stabilization.
     Returns:
         A dictionary mapping frame numbers to (dx, dy) offsets.
     """
     motion_data = {}
     cumulative_dx = 0.0
     cumulative_dy = 0.0
@@ -134,30 +138,25 @@ def read_motion_csv(csv_filename):
             cumulative_dx += dx
             cumulative_dy += dy
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
     return motion_data
 def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, output_file=None):
     """
     Stabilizes the input video using motion data from the CSV file.
-    Args:
-        video_file (str): Path to the input video.
-        csv_file (str): Path to the motion CSV file.
-        zoom (float): Zoom factor to apply before stabilization (default: 1.0).
-        output_file (str): Path for the output stabilized video. If None, a temporary file is created.
-    Returns:
-        output_file (str): Path to the stabilized video file.
     """
     motion_data = read_motion_csv(csv_file)
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
-        raise ValueError("Could not open video file for stabilization.")
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
     if output_file is None:
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
@@ -168,11 +167,14 @@ def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, output_file=None):
     out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
     frame_idx = 1
     while True:
         ret, frame = cap.read()
         if not ret:
             break
         if zoom != 1.0:
             zoomed_frame = cv2.resize(frame, None, fx=zoom, fy=zoom, interpolation=cv2.INTER_LINEAR)
             zoomed_h, zoomed_w = zoomed_frame.shape[:2]
@@ -186,37 +188,43 @@ def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, output_file=None):
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
         out.write(stabilized_frame)
         frame_idx += 1
     cap.release()
     out.release()
-    print(f"Stabilized video saved to: {output_file}")
     return output_file
 def process_video_ai(video_file, zoom):
     """
     Gradio interface function:
-      - Generates motion data (CSV) from the input video using an AI model (RAFT, if available).
       - Stabilizes the video based on the generated motion data.
     Returns:
-        Tuple containing the original video file path and the stabilized video file path.
     """
-    if isinstance(video_file, dict):
-        video_file = video_file.get("name", None)
-    if video_file is None:
-        raise ValueError("Please upload a video file.")
-    # Generate motion CSV using the AI model (or fallback) for optical flow.
-    csv_file = generate_motion_csv(video_file)
-    # Stabilize the video using the generated CSV.
-    stabilized_path = stabilize_video_using_csv(video_file, csv_file, zoom=zoom)
-    return video_file, stabilized_path
 # Build the Gradio UI.
 with gr.Blocks() as demo:
     gr.Markdown("# AI-Powered Video Stabilization")
-    gr.Markdown("Upload a video and select a zoom factor. The system will automatically generate motion data (video.flow.csv) using an AI model (RAFT, if available) and then stabilize the video.")
     with gr.Row():
         with gr.Column():
@@ -226,11 +234,12 @@ with gr.Blocks() as demo:
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
     process_button.click(
         fn=process_video_ai,
         inputs=[video_input, zoom_slider],
-        outputs=[original_video, stabilized_video]
     )
 demo.launch()

 import tempfile
 import os
 import gradio as gr
+import time
+import io
+from contextlib import redirect_stdout
 # Set up device for torch
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"[INFO] Using device: {device}")
 # Try to load the RAFT model from torch.hub.
+# If it fails, we fall back to OpenCV optical flow.
 try:
+    print("[INFO] Attempting to load RAFT model from torch.hub...")
     raft_model = torch.hub.load("princeton-vl/RAFT", "raft_small", pretrained=True, trust_repo=True)
     raft_model = raft_model.to(device)
     raft_model.eval()
+    print("[INFO] RAFT model loaded successfully.")
 except Exception as e:
+    print("[ERROR] Error loading RAFT model:", e)
+    print("[INFO] Falling back to OpenCV Farneback optical flow.")
     raft_model = None
 def generate_motion_csv(video_file, output_csv=None):
     """
     Generates a CSV file with motion data (columns: frame, mag, ang, zoom) from an input video.
+    Uses RAFT if available, otherwise falls back to OpenCV's Farneback optical flow.
     """
+    start_time = time.time()
     if output_csv is None:
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.csv')
         output_csv = temp_file.name
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
+        raise ValueError("[ERROR] Could not open video file for CSV generation.")
+    print(f"[INFO] Generating motion CSV for video: {video_file}")
     with open(output_csv, 'w', newline='') as csvfile:
         fieldnames = ['frame', 'mag', 'ang', 'zoom']
         writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
         ret, first_frame = cap.read()
         if not ret:
+            raise ValueError("[ERROR] Cannot read first frame from video.")
         if raft_model is not None:
             first_frame_rgb = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
             prev_tensor = torch.from_numpy(first_frame_rgb).permute(2, 0, 1).float().unsqueeze(0) / 255.0
             prev_tensor = prev_tensor.to(device)
+            print("[INFO] Using RAFT model for optical flow computation.")
         else:
             prev_gray = cv2.cvtColor(first_frame, cv2.COLOR_BGR2GRAY)
+            print("[INFO] Using OpenCV Farneback optical flow for computation.")
         frame_idx = 1
+        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+        print(f"[INFO] Total frames to process: {total_frames}")
         while True:
             ret, frame = cap.read()
             if not ret:
                 'zoom': zoom_factor
             })
+            if frame_idx % 10 == 0 or frame_idx == total_frames:
+                print(f"[INFO] Processed frame {frame_idx}/{total_frames}")
             frame_idx += 1
     cap.release()
+    elapsed = time.time() - start_time
+    print(f"[INFO] Motion CSV generated: {output_csv} in {elapsed:.2f} seconds")
     return output_csv
 def read_motion_csv(csv_filename):
     """
     Reads a motion CSV file (with columns: frame, mag, ang, zoom) and computes a cumulative
+    offset per frame for stabilization.
     Returns:
         A dictionary mapping frame numbers to (dx, dy) offsets.
     """
+    print(f"[INFO] Reading motion CSV: {csv_filename}")
     motion_data = {}
     cumulative_dx = 0.0
     cumulative_dy = 0.0
             cumulative_dx += dx
             cumulative_dy += dy
             motion_data[frame_num] = (-cumulative_dx, -cumulative_dy)
+    print("[INFO] Completed reading motion CSV.")
     return motion_data
 def stabilize_video_using_csv(video_file, csv_file, zoom=1.0, output_file=None):
     """
     Stabilizes the input video using motion data from the CSV file.
     """
+    start_time = time.time()
+    print(f"[INFO] Starting stabilization using CSV: {csv_file}")
     motion_data = read_motion_csv(csv_file)
     cap = cv2.VideoCapture(video_file)
     if not cap.isOpened():
+        raise ValueError("[ERROR] Could not open video file for stabilization.")
     fps = cap.get(cv2.CAP_PROP_FPS)
     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    print(f"[INFO] Video properties - FPS: {fps}, Width: {width}, Height: {height}")
     if output_file is None:
         temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
     out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
     frame_idx = 1
+    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    print(f"[INFO] Total frames to stabilize: {total_frames}")
     while True:
         ret, frame = cap.read()
         if not ret:
             break
+        # Optionally apply zoom (resize and center-crop)
         if zoom != 1.0:
             zoomed_frame = cv2.resize(frame, None, fx=zoom, fy=zoom, interpolation=cv2.INTER_LINEAR)
             zoomed_h, zoomed_w = zoomed_frame.shape[:2]
         stabilized_frame = cv2.warpAffine(frame, transform, (width, height))
         out.write(stabilized_frame)
+        if frame_idx % 10 == 0 or frame_idx == total_frames:
+            print(f"[INFO] Stabilized frame {frame_idx}/{total_frames}")
         frame_idx += 1
     cap.release()
     out.release()
+    elapsed = time.time() - start_time
+    print(f"[INFO] Stabilized video saved to: {output_file} in {elapsed:.2f} seconds")
     return output_file
 def process_video_ai(video_file, zoom):
     """
     Gradio interface function:
+      - Generates motion data (CSV) from the input video using an AI model (RAFT if available, else Farneback).
       - Stabilizes the video based on the generated motion data.
     Returns:
+        Tuple containing the original video file path, the stabilized video file path, and log output.
     """
+    log_buffer = io.StringIO()
+    with redirect_stdout(log_buffer):
+        if isinstance(video_file, dict):
+            video_file = video_file.get("name", None)
+        if video_file is None:
+            raise ValueError("[ERROR] Please upload a video file.")
+        print("[INFO] Starting AI-powered video processing...")
+        csv_file = generate_motion_csv(video_file)
+        stabilized_path = stabilize_video_using_csv(video_file, csv_file, zoom=zoom)
+        print("[INFO] Video processing complete.")
+    logs = log_buffer.getvalue()
+    return video_file, stabilized_path, logs
 # Build the Gradio UI.
 with gr.Blocks() as demo:
     gr.Markdown("# AI-Powered Video Stabilization")
+    gr.Markdown("Upload a video and select a zoom factor. The system will generate motion data using an AI model (RAFT if available) and then stabilize the video.")
     with gr.Row():
         with gr.Column():
         with gr.Column():
             original_video = gr.Video(label="Original Video")
             stabilized_video = gr.Video(label="Stabilized Video")
+            logs_output = gr.Textbox(label="Logs", lines=10)
     process_button.click(
         fn=process_video_ai,
         inputs=[video_input, zoom_slider],
+        outputs=[original_video, stabilized_video, logs_output]
     )
 demo.launch()