svjack commited on
Commit
4f2eaa1
·
verified ·
1 Parent(s): 54ed1a2

Upload long_app.py

Browse files
Files changed (1) hide show
  1. long_app.py +207 -0
long_app.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gc
3
+ import numpy as np
4
+ import torch
5
+ import spaces
6
+ import gradio as gr
7
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
8
+ from video_depth_anything.video_depth import VideoDepthAnything
9
+ from utils.dc_utils import read_video_frames, save_video
10
+ from huggingface_hub import hf_hub_download
11
+
12
+ examples = [
13
+ ['assets/example_videos/davis_rollercoaster.mp4', -1, -1, 1280],
14
+ ['assets/example_videos/Tokyo-Walk_rgb.mp4', -1, -1, 1280],
15
+ ['assets/example_videos/4158877-uhd_3840_2160_30fps_rgb.mp4', -1, -1, 1280],
16
+ ['assets/example_videos/4511004-uhd_3840_2160_24fps_rgb.mp4', -1, -1, 1280],
17
+ ['assets/example_videos/1753029-hd_1920_1080_30fps.mp4', -1, -1, 1280],
18
+ ['assets/example_videos/davis_burnout.mp4', -1, -1, 1280],
19
+ ['assets/example_videos/example_5473765-l.mp4', -1, -1, 1280],
20
+ ['assets/example_videos/Istanbul-26920.mp4', -1, -1, 1280],
21
+ ['assets/example_videos/obj_1.mp4', -1, -1, 1280],
22
+ ['assets/example_videos/sheep_cut1.mp4', -1, -1, 1280],
23
+ ]
24
+
25
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
26
+
27
+ model_configs = {
28
+ 'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
29
+ 'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
30
+ }
31
+
32
+ encoder2name = {
33
+ 'vits': 'Small',
34
+ 'vitl': 'Large',
35
+ }
36
+
37
+ #encoder = 'vitl'
38
+ encoder = 'vits'
39
+ model_name = encoder2name[encoder]
40
+
41
+ video_depth_anything = VideoDepthAnything(**model_configs[encoder])
42
+ filepath = hf_hub_download(repo_id=f"depth-anything/Video-Depth-Anything-{model_name}", filename=f"video_depth_anything_{encoder}.pth", repo_type="model")
43
+ video_depth_anything.load_state_dict(torch.load(filepath, map_location='cpu'))
44
+ video_depth_anything = video_depth_anything.to(DEVICE).eval()
45
+
46
+ title = "# Video Depth Anything"
47
+ description = """Official demo for ​**Video Depth Anything**.
48
+ Please refer to our [paper](https://arxiv.org/abs/2501.12375), [project page](https://videodepthanything.github.io/), and [github](https://github.com/DepthAnything/Video-Depth-Anything) for more details."""
49
+
50
+ @spaces.GPU(duration=240)
51
+ def infer_video_depth(
52
+ input_video: str,
53
+ max_len: int = -1,
54
+ target_fps: int = -1,
55
+ max_res: int = 1280,
56
+ grayscale: bool = False,
57
+ output_dir: str = './outputs',
58
+ input_size: int = 518,
59
+ ):
60
+ if not os.path.exists(output_dir):
61
+ os.makedirs(output_dir)
62
+
63
+ video_name = os.path.basename(input_video)
64
+ processed_video_path = os.path.join(output_dir, os.path.splitext(video_name)[0]+'_src.mp4')
65
+ depth_vis_path = os.path.join(output_dir, os.path.splitext(video_name)[0]+'_vis.mp4')
66
+
67
+ # Load the video
68
+ clip = VideoFileClip(input_video)
69
+ fps = clip.fps
70
+ total_frames = int(clip.duration * fps)
71
+
72
+ # Define the number of frames per segment
73
+ frames_per_segment = 45 # Adjust this value based on your GPU memory
74
+ segments = []
75
+ for start_frame in range(0, total_frames, frames_per_segment):
76
+ end_frame = min(start_frame + frames_per_segment, total_frames)
77
+ start_time = start_frame / fps
78
+ end_time = end_frame / fps
79
+ segment = clip.subclip(start_time, end_time)
80
+ segment_path = os.path.join(output_dir, f'segment_{start_frame}.mp4')
81
+ segment.write_videofile(segment_path, codec='libx264')
82
+ segments.append(segment_path)
83
+
84
+ # Save the processed video (concatenated segments)
85
+ processed_segments = [VideoFileClip(segment) for segment in segments]
86
+ final_processed_clip = concatenate_videoclips(processed_segments)
87
+ final_processed_clip.write_videofile(processed_video_path, codec='libx264')
88
+
89
+ # Process each segment
90
+ depth_segments = []
91
+ for segment in segments:
92
+ frames, target_fps = read_video_frames(segment, max_len, target_fps, max_res)
93
+ print("frame length", len(frames))
94
+ depths, fps = video_depth_anything.infer_video_depth(frames, target_fps, input_size=input_size, device=DEVICE)
95
+ depth_segment_path = os.path.join(output_dir, f'depth_{os.path.basename(segment)}')
96
+ save_video(depths, depth_segment_path, fps=fps, is_depths=True, grayscale=grayscale)
97
+ depth_segments.append(depth_segment_path)
98
+
99
+ # Merge depth segments
100
+ depth_clips = [VideoFileClip(depth_segment) for depth_segment in depth_segments]
101
+ final_depth_clip = concatenate_videoclips(depth_clips)
102
+ final_depth_clip.write_videofile(depth_vis_path, codec='libx264')
103
+
104
+ # Clean up
105
+ for segment in segments:
106
+ os.remove(segment)
107
+ for depth_segment in depth_segments:
108
+ os.remove(depth_segment)
109
+
110
+ gc.collect()
111
+ torch.cuda.empty_cache()
112
+
113
+ return [processed_video_path, depth_vis_path]
114
+
115
+ def construct_demo():
116
+ with gr.Blocks(analytics_enabled=False) as demo:
117
+ gr.Markdown(title)
118
+ gr.Markdown(description)
119
+ gr.Markdown("### If you find this work useful, please help ⭐ the [$$Github Repo$$](https://github.com/DepthAnything/Video-Depth-Anything). Thanks for your attention!")
120
+
121
+ with gr.Row(equal_height=True):
122
+ with gr.Column(scale=1):
123
+ input_video = gr.Video(label="Input Video")
124
+
125
+ with gr.Column(scale=2):
126
+ with gr.Row(equal_height=True):
127
+ processed_video = gr.Video(
128
+ label="Preprocessed video",
129
+ interactive=False,
130
+ autoplay=True,
131
+ loop=True,
132
+ show_share_button=True,
133
+ scale=5,
134
+ )
135
+ depth_vis_video = gr.Video(
136
+ label="Generated Depth Video",
137
+ interactive=False,
138
+ autoplay=True,
139
+ loop=True,
140
+ show_share_button=True,
141
+ scale=5,
142
+ )
143
+
144
+ with gr.Row(equal_height=True):
145
+ with gr.Column(scale=1):
146
+ with gr.Row(equal_height=False):
147
+ with gr.Accordion("Advanced Settings", open=False):
148
+ max_len = gr.Slider(
149
+ label="max process length",
150
+ minimum=-1,
151
+ maximum=1000,
152
+ value=500,
153
+ step=1,
154
+ )
155
+ target_fps = gr.Slider(
156
+ label="target FPS",
157
+ minimum=-1,
158
+ maximum=30,
159
+ value=15,
160
+ step=1,
161
+ )
162
+ max_res = gr.Slider(
163
+ label="max side resolution",
164
+ minimum=480,
165
+ maximum=1920,
166
+ value=1280,
167
+ step=1,
168
+ )
169
+ grayscale = gr.Checkbox(
170
+ label="grayscale",
171
+ value=False,
172
+ )
173
+ generate_btn = gr.Button("Generate")
174
+ with gr.Column(scale=2):
175
+ pass
176
+
177
+ gr.Examples(
178
+ examples=examples,
179
+ inputs=[
180
+ input_video,
181
+ max_len,
182
+ target_fps,
183
+ max_res
184
+ ],
185
+ outputs=[processed_video, depth_vis_video],
186
+ fn=infer_video_depth,
187
+ cache_examples="lazy",
188
+ )
189
+
190
+ generate_btn.click(
191
+ fn=infer_video_depth,
192
+ inputs=[
193
+ input_video,
194
+ max_len,
195
+ target_fps,
196
+ max_res,
197
+ grayscale
198
+ ],
199
+ outputs=[processed_video, depth_vis_video],
200
+ )
201
+
202
+ return demo
203
+
204
+ if __name__ == "__main__":
205
+ demo = construct_demo()
206
+ demo.queue()
207
+ demo.launch(share=True)