Spaces:
Running
Running
File size: 7,105 Bytes
344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 a612409 344feb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import os
import gc
import cv2
import gradio as gr
import numpy as np
import matplotlib.cm as cm
import matplotlib # New import for the updated colormap API
import subprocess
import sys
import spaces
from utils.dc_utils import read_video_frames, save_video
title = "#RGBD sbs output"
description = """**Video Depth Anything** + RGBD sbs output for viewing with Looking Glass Factory displays.
Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
@spaces.GPU(enable_queue=True)
def stitch_rgbd_videos(
processed_video: str,
depth_vis_video: str,
max_len: int = -1,
target_fps: int = -1,
max_res: int = 1280,
stitch: bool = True,
grayscale: bool = True,
convert_from_color: bool = True,
blur: float = 0.3,
output_dir: str = './outputs',
input_size: int = 518,
):
# 1. Read input video frames for inference (downscaled to max_res).
frames, target_fps = read_video_frames(processed_video, max_len, target_fps, max_res)
video_name = os.path.basename(processed_video)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
stitched_video_path = None
if stitch:
# For stitching: read the original video in full resolution (without downscaling).
full_frames, _ = read_video_frames(processed_video, max_len, target_fps, max_res=-1)
depths, _ = read_video_frames(depth_vis_video, max_len, target_fps, max_res=-1)
# For each frame, create a visual depth image from the inferenced depths.
d_min, d_max = depths.min(), depths.max()
stitched_frames = []
for i in range(min(len(full_frames), len(depths))):
rgb_full = full_frames[i] # Full-resolution RGB frame.
depth_frame = depths[i]
# Normalize the depth frame to the range [0, 255].
depth_norm = ((depth_frame - d_min) / (d_max - d_min) * 255).astype(np.uint8)
# Generate depth visualization:
if grayscale:
if convert_from_color:
# First, generate a color depth image using the inferno colormap,
# then convert that color image to grayscale.
cmap = matplotlib.colormaps.get_cmap("inferno")
depth_color = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
depth_gray = cv2.cvtColor(depth_color, cv2.COLOR_RGB2GRAY)
depth_vis = np.stack([depth_gray] * 3, axis=-1)
else:
# Directly generate a grayscale image from the normalized depth values.
depth_vis = np.stack([depth_norm] * 3, axis=-1)
else:
# Generate a color depth image using the inferno colormap.
cmap = matplotlib.colormaps.get_cmap("inferno")
depth_vis = (cmap(depth_norm / 255.0)[..., :3] * 255).astype(np.uint8)
# Apply Gaussian blur if requested.
if blur > 0:
kernel_size = int(blur * 20) * 2 + 1 # Ensures an odd kernel size.
depth_vis = cv2.GaussianBlur(depth_vis, (kernel_size, kernel_size), 0)
# Resize the depth visualization to match the full-resolution RGB frame.
H_full, W_full = rgb_full.shape[:2]
depth_vis_resized = cv2.resize(depth_vis, (W_full, H_full))
# Concatenate the full-resolution RGB frame (left) and the resized depth visualization (right).
stitched = cv2.hconcat([rgb_full, depth_vis_resized])
stitched_frames.append(stitched)
stitched_frames = np.array(stitched_frames)
# Use only the first 20 characters of the base name for the output filename and append '_RGBD.mp4'
base_name = os.path.splitext(video_name)[0]
short_name = base_name[:20]
stitched_video_path = os.path.join(output_dir, short_name + '_RGBD.mp4')
save_video(stitched_frames, stitched_video_path, fps=target_fps)
# Merge audio from the input video into the stitched video using ffmpeg.
temp_audio_path = stitched_video_path.replace('_RGBD.mp4', '_RGBD_audio.mp4')
cmd = [
"ffmpeg",
"-y",
"-i", stitched_video_path,
"-i", processed_video,
"-c:v", "copy",
"-c:a", "aac",
"-map", "0:v:0",
"-map", "1:a:0?",
"-shortest",
temp_audio_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
os.replace(temp_audio_path, stitched_video_path)
# Return stitched video.
return [stitched_video_path]
def construct_demo():
with gr.Blocks(analytics_enabled=False) as demo:
gr.Markdown(title)
gr.Markdown(description)
gr.Markdown("### If you find this work useful, please help ⭐ the [Github Repo](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
with gr.Row(equal_height=True):
with gr.Column(scale=1):
# Video input component for file upload.
processed_video = gr.Video(label="Input Video")
depth_vis_video = gr.Video(label="Generated Depth Video")
with gr.Column(scale=2):
with gr.Row(equal_height=True):
stitched_video = gr.Video(label="Stitched RGBD Video", interactive=False, autoplay=True, loop=True, show_share_button=True, scale=5)
with gr.Row(equal_height=True):
with gr.Column(scale=1):
with gr.Accordion("Advanced Settings", open=False):
max_len = gr.Slider(label="Max process length", minimum=-1, maximum=1000, value=-1, step=1)
target_fps = gr.Slider(label="Target FPS", minimum=-1, maximum=30, value=-1, step=1)
max_res = gr.Slider(label="Max side resolution", minimum=480, maximum=1920, value=1280, step=1)
stitch_option = gr.Checkbox(label="Stitch RGB & Depth Videos", value=True)
grayscale_option = gr.Checkbox(label="Output Depth as Grayscale", value=True)
convert_from_color_option = gr.Checkbox(label="Convert Grayscale from Color", value=True)
blur_slider = gr.Slider(minimum=0, maximum=1, step=0.01, label="Depth Blur (can reduce edge artifacts on display)", value=0.3)
generate_btn = gr.Button("Generate")
with gr.Column(scale=2):
pass
generate_btn.click(
fn=stitch_rgbd_videos,
inputs=[processed_video, depth_vis_video, max_len, target_fps, max_res, stitch_option, grayscale_option, convert_from_color_option, blur_slider],
outputs=[stitched_video],
)
return demo
if __name__ == "__main__":
demo = construct_demo()
demo.queue(max_size=2).launch() |