import cv2 import numpy as np import os import tempfile from tqdm import tqdm import gradio as gr import ffmpeg def extract_frames(video_path): """ Extracts all frames from the input video. """ cap = cv2.VideoCapture(video_path) frames = [] while True: ret, frame = cap.read() if not ret: break frames.append(frame) cap.release() print(f"Extracted {len(frames)} frames from {video_path}") return frames def apply_style_propagation(frames, style_image_path, enable_temporal_reset=True, enable_median_filtering=True, enable_patch_based=True, enable_sharpening=True): """ Applies the style from the provided keyframe image to every frame using optical flow, with additional corrections controlled by boolean flags: - Temporal Reset/Re‑anchoring (if enabled) - Median filtering of the flow (if enabled) - Patch‑based correction for extreme flow (if enabled) - Sharpening after warping (if enabled) """ # Load and resize the style image to match video dimensions. style_image = cv2.imread(style_image_path) if style_image is None: raise ValueError(f"Failed to load style image from {style_image_path}") h, w = frames[0].shape[:2] style_image = cv2.resize(style_image, (w, h)) # Keep a copy for temporal re-anchoring. original_styled = style_image.copy() styled_frames = [style_image] prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY) # Parameters for corrections: reset_interval = 30 # Every 30 frames, blend with original style. block_size = 16 # Size of block for patch matching. patch_threshold = 10 # Threshold for mean flow magnitude in a block. search_margin = 10 # Margin around block for patch matching. for i in tqdm(range(1, len(frames)), desc="Propagating style"): curr_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY) flow = cv2.calcOpticalFlowFarneback( prev_gray, curr_gray, None, pyr_scale=0.5, levels=3, winsize=15, iterations=3, poly_n=5, poly_sigma=1.2, flags=0 ) # --- Method 3: Median Filtering of the Flow --- if enable_median_filtering: flow_x = flow[..., 0] flow_y = flow[..., 1] flow_x_filtered = cv2.medianBlur(flow_x, 3) flow_y_filtered = cv2.medianBlur(flow_y, 3) flow_filtered = np.dstack((flow_x_filtered, flow_y_filtered)) else: flow_filtered = flow # --- Method 4: Patch-based Correction for Extreme Flow --- if enable_patch_based: flow_corrected = flow_filtered.copy() for by in range(0, h, block_size): for bx in range(0, w, block_size): # Define block region (handle edges) y1, y2 = by, min(by + block_size, h) x1, x2 = bx, min(bx + block_size, w) block_flow = flow_filtered[y1:y2, x1:x2] mag = np.sqrt(block_flow[..., 0]**2 + block_flow[..., 1]**2) mean_mag = np.mean(mag) if mean_mag > patch_threshold: # Use patch matching to recalc flow for this block. patch = prev_gray[y1:y2, x1:x2] sx1 = max(x1 - search_margin, 0) sy1 = max(by - search_margin, 0) sx2 = min(x2 + search_margin, w) sy2 = min(y2 + search_margin, h) search_region = curr_gray[sy1:sy2, sx1:sx2] if search_region.shape[0] < patch.shape[0] or search_region.shape[1] < patch.shape[1]: continue res = cv2.matchTemplate(search_region, patch, cv2.TM_SQDIFF_NORMED) _, _, min_loc, _ = cv2.minMaxLoc(res) best_x = sx1 + min_loc[0] best_y = sy1 + min_loc[1] offset_x = best_x - x1 offset_y = best_y - by flow_corrected[y1:y2, x1:x2, 0] = offset_x flow_corrected[y1:y2, x1:x2, 1] = offset_y else: flow_corrected = flow_filtered # Compute mapping coordinates. grid_x, grid_y = np.meshgrid(np.arange(w), np.arange(h)) map_x = grid_x + flow_corrected[..., 0] map_y = grid_y + flow_corrected[..., 1] map_x = np.clip(map_x, 0, w - 1).astype(np.float32) map_y = np.clip(map_y, 0, h - 1).astype(np.float32) # Warp the previous styled frame. warped_styled = cv2.remap(styled_frames[-1], map_x, map_y, interpolation=cv2.INTER_LINEAR) # --- Method 2: Temporal Reset/Re-anchoring --- if enable_temporal_reset and (i % reset_interval == 0): warped_styled = cv2.addWeighted(warped_styled, 0.7, original_styled, 0.3, 0) # --- Method 5: Sharpening Post-Warping --- if enable_sharpening: kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]], dtype=np.float32) warped_styled = cv2.filter2D(warped_styled, -1, kernel) styled_frames.append(warped_styled) prev_gray = curr_gray print(f"Propagated style to {len(styled_frames)} frames.") sample_frame = styled_frames[len(styled_frames) // 2] print(f"Sample styled frame mean intensity: {np.mean(sample_frame):.2f}") return styled_frames def save_video_cv2(frames, output_path, fps=30): """ Saves a list of frames as a video using OpenCV. """ h, w, _ = frames[0].shape fourcc = cv2.VideoWriter_fourcc(*'mp4v') writer = cv2.VideoWriter(output_path, fourcc, fps, (w, h)) for frame in frames: writer.write(frame) writer.release() size = os.path.getsize(output_path) print(f"Intermediate video saved to {output_path} (size: {size} bytes)") def process_video(video_file, style_image_file, fps=30, target_width=0, target_height=0, enable_temporal_reset=True, enable_median_filtering=True, enable_patch_based=True, enable_sharpening=True): """ Processes the input video by applying the style image via optical flow propagation, with optional corrections (temporal reset, median filtering, patch-based correction, sharpening). Optionally downscale the video and style image to the specified resolution. Then re-encodes the video with FFmpeg for web compatibility. Parameters: - video_file: The input video file. - style_image_file: The stylized keyframe image. - fps: Output frames per second. - target_width: Target width for downscaling (0 for original). - target_height: Target height for downscaling (0 for original). - enable_temporal_reset: Boolean flag for temporal reset. - enable_median_filtering: Boolean flag for median filtering of flow. - enable_patch_based: Boolean flag for patch-based correction. - enable_sharpening: Boolean flag for sharpening post-warp. Returns: - Path to the final output video. """ # Get the video file path. video_path = video_file if isinstance(video_file, str) else video_file["name"] # Process the style image input. if isinstance(style_image_file, str): style_image_path = style_image_file elif isinstance(style_image_file, dict) and "name" in style_image_file: style_image_path = style_image_file["name"] elif isinstance(style_image_file, np.ndarray): tmp_style = os.path.join(tempfile.gettempdir(), "temp_style_image.jpeg") cv2.imwrite(tmp_style, cv2.cvtColor(style_image_file, cv2.COLOR_RGB2BGR)) style_image_path = tmp_style else: return "Error: Unsupported style image format." # Extract frames from the video. frames = extract_frames(video_path) if not frames: return "Error: No frames extracted from the video." original_h, original_w = frames[0].shape[:2] print(f"Original video resolution: {original_w}x{original_h}") # Downscale if target dimensions are provided. if target_width > 0 and target_height > 0: print(f"Downscaling frames to resolution: {target_width}x{target_height}") frames = [cv2.resize(frame, (target_width, target_height)) for frame in frames] else: print("No downscaling applied. Using original resolution.") # Propagate style with the selected corrections. styled_frames = apply_style_propagation(frames, style_image_path, enable_temporal_reset=enable_temporal_reset, enable_median_filtering=enable_median_filtering, enable_patch_based=enable_patch_based, enable_sharpening=enable_sharpening) # Save intermediate video using OpenCV to a named temporary file. temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) temp_video_file.close() temp_video_path = temp_video_file.name save_video_cv2(styled_frames, temp_video_path, fps=fps) # Re-encode the video using FFmpeg for browser compatibility. output_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) output_video_file.close() output_video_path = output_video_file.name try: ( ffmpeg .input(temp_video_path) .output(output_video_path, vcodec='libx264', pix_fmt='yuv420p', r=fps) .run(overwrite_output=True, quiet=True) ) except ffmpeg.Error as e: print("FFmpeg error:", e) return "Error during video re-encoding." final_size = os.path.getsize(output_video_path) print(f"Output video saved to {output_video_path} (size: {final_size} bytes)") if final_size == 0: return "Error: Output video file is empty." # Clean up the intermediate file. os.remove(temp_video_path) return output_video_path iface = gr.Interface( fn=process_video, inputs=[ gr.Video(label="Input Video (v.mp4)"), gr.Image(label="Stylized Keyframe (a.jpeg)"), gr.Slider(minimum=1, maximum=60, step=1, value=30, label="Output FPS"), gr.Slider(minimum=0, maximum=1920, step=1, value=0, label="Target Width (0 for original)"), gr.Slider(minimum=0, maximum=1080, step=1, value=0, label="Target Height (0 for original)"), gr.Checkbox(label="Enable Temporal Reset", value=True), gr.Checkbox(label="Enable Median Filtering", value=True), gr.Checkbox(label="Enable Patch-Based Correction", value=True), gr.Checkbox(label="Enable Sharpening", value=True) ], outputs=gr.Video(label="Styled Video"), title="Optical Flow Style Propagation with Corrections", description=( "Upload a video and a stylized keyframe image. Optionally downscale to a target resolution.\n" "You can enable/disable the following corrections:\n" "• Temporal Reset/Re-anchoring\n" "• Median Filtering of Flow\n" "• Patch-Based Correction for Extreme Flow\n" "• Sharpening Post-Warping\n" "The output video is re-encoded for web compatibility." ) ) if __name__ == "__main__": iface.launch(share=True)