Spaces:
Sleeping
Sleeping
sandesh-bharadwaj
commited on
Commit
·
3278a88
1
Parent(s):
0fd5831
Working gradio version of VidTune
Browse files
app.py
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from engine import DescribeVideo, GenerateAudio
|
3 |
+
from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
|
4 |
+
from moviepy.audio.fx.volumex import volumex
|
5 |
+
import shutil, tempfile, os
|
6 |
+
|
7 |
+
video_model_map = {
|
8 |
+
"Fast": "flash",
|
9 |
+
"Quality": "pro",
|
10 |
+
}
|
11 |
+
|
12 |
+
music_model_map = {
|
13 |
+
"Fast": "musicgen-stereo-small",
|
14 |
+
"Balanced": "musicgen-stereo-medium",
|
15 |
+
"Quality": "musicgen-stereo-large",
|
16 |
+
}
|
17 |
+
|
18 |
+
genre_map = {
|
19 |
+
"None": None,
|
20 |
+
"Pop": "Pop",
|
21 |
+
"Rock": "Rock",
|
22 |
+
"Hip Hop": "Hip-Hop/Rap",
|
23 |
+
"Jazz": "Jazz",
|
24 |
+
"Classical": "Classical",
|
25 |
+
"Blues": "Blues",
|
26 |
+
"Country": "Country",
|
27 |
+
"EDM": "Electronic/Dance",
|
28 |
+
"Metal": "Metal",
|
29 |
+
"Disco": "Disco",
|
30 |
+
"Lo-Fi": "Lo-Fi",
|
31 |
+
}
|
32 |
+
|
33 |
+
# Methods for Gradio state updates
|
34 |
+
def on_advanced_change(state):
|
35 |
+
return gr.Accordion(open=state, visible=state)
|
36 |
+
|
37 |
+
def upload_file(file):
|
38 |
+
return gr.Video(file.name, label=file.name, height=640, show_download_button=False, show_label=False, visible=True)
|
39 |
+
|
40 |
+
def on_vdc_change(content):
|
41 |
+
return gr.Textbox(content, label="Video Description", visible=True)
|
42 |
+
|
43 |
+
def on_mp_change(content):
|
44 |
+
return gr.Textbox(content, label="Music Prompt", visible=True)
|
45 |
+
|
46 |
+
# Global state variables for Gradio
|
47 |
+
video_duration = 0
|
48 |
+
audio_paths = None
|
49 |
+
|
50 |
+
|
51 |
+
# Function to generate unique directory for each session
|
52 |
+
def create_session_dir():
|
53 |
+
return tempfile.mkdtemp()
|
54 |
+
|
55 |
+
# Function to clean up the session directory
|
56 |
+
def cleanup_session_dir():
|
57 |
+
if os.path.exists(session_dir):
|
58 |
+
shutil.rmtree(session_dir, ignore_errors=True)
|
59 |
+
|
60 |
+
def on_select_dropdown(value, evt: gr.EventData):
|
61 |
+
if value > 0:
|
62 |
+
orig_clip_vol = gr.Slider(minimum=0, maximum=200, value=100, label="Original Audio Volume (%)", visible=True, interactive=True, step=1)
|
63 |
+
|
64 |
+
generated_audio_vol = gr.Slider(minimum=0, maximum=200, value=100, label="Generated Music Volume (%)", visible=True, interactive=True, step=1)
|
65 |
+
mix_music_button = gr.Button("Add Generated Music to Video", visible=True, interactive=True)
|
66 |
+
return orig_clip_vol, generated_audio_vol, mix_music_button
|
67 |
+
else:
|
68 |
+
return gr.Slider(minimum=0, maximum=200, value=100, label="Original Audio Volume (%)", visible=False, interactive=False, step=1), gr.Slider(minimum=0, maximum=200, value=100, label="Generated Music Volume (%)", visible=False, interactive=False, step=1), gr.Button(visible=False, interactive=False)
|
69 |
+
|
70 |
+
# Video Description Generation
|
71 |
+
def generate_video_description(video_descriptor, google_api_key, toggle_advanced, video_file, genre, bpm, user_keywords):
|
72 |
+
global video_duration
|
73 |
+
try:
|
74 |
+
if google_api_key == "":
|
75 |
+
raise gr.Error("Please enter your Google API Key before continuing!")
|
76 |
+
if video_file is None:
|
77 |
+
raise gr.Error("Please upload a video before generating music.")
|
78 |
+
video_descriptor = DescribeVideo(
|
79 |
+
model=video_model_map[video_descriptor], google_api_key=google_api_key
|
80 |
+
)
|
81 |
+
|
82 |
+
if not toggle_advanced:
|
83 |
+
video_description = video_descriptor.describe_video(
|
84 |
+
video_file, genre=None,
|
85 |
+
bpm=None,
|
86 |
+
user_keywords=None
|
87 |
+
)
|
88 |
+
else:
|
89 |
+
video_description = video_descriptor.describe_video(
|
90 |
+
video_file, genre=genre,
|
91 |
+
bpm=bpm,
|
92 |
+
user_keywords=user_keywords
|
93 |
+
)
|
94 |
+
video_duration = VideoFileClip(video_file).duration
|
95 |
+
|
96 |
+
gr.Info("Video Description generated successfully.")
|
97 |
+
gr.Info("Music Prompt generated successfully.")
|
98 |
+
|
99 |
+
# Return the updated states to update the UI
|
100 |
+
return video_description["Content Description"], video_description["Music Prompt"]
|
101 |
+
|
102 |
+
except Exception as e:
|
103 |
+
raise gr.Error("Exception raised: ", e)
|
104 |
+
|
105 |
+
def generate_music(music_generator, music_prompt, num_samples):
|
106 |
+
global video_duration, audio_paths, session_dir
|
107 |
+
try:
|
108 |
+
audio_generator = GenerateAudio(model=music_model_map[music_generator])
|
109 |
+
if audio_generator.device == "cpu":
|
110 |
+
gr.Warning("The music generator model is running on CPU. For faster results, consider using a GPU.")
|
111 |
+
|
112 |
+
music_prompt = [music_prompt] * num_samples
|
113 |
+
audio_generator.generate_audio(music_prompt, duration=video_duration)
|
114 |
+
audio_paths = audio_generator.save_audio(audio_dir=session_dir)
|
115 |
+
|
116 |
+
gr.Info("Music generated successfully.")
|
117 |
+
|
118 |
+
show_players = [gr.Audio(visible=True, value=audio_path, show_label=False, scale=0.5) for audio_path in audio_paths]
|
119 |
+
hide_players = [gr.Audio(visible=False) for _ in range(5-len(audio_paths))]
|
120 |
+
|
121 |
+
dropdown_choices = ["None"] + [f"Generated Music {i+1}" for i in range(len(show_players))]
|
122 |
+
selections = gr.Dropdown(choices=dropdown_choices, visible=True, interactive=True, label="Select one of the generated audio files for further processing:", value="None", type='index')
|
123 |
+
|
124 |
+
return show_players + hide_players + [selections]
|
125 |
+
except Exception as e:
|
126 |
+
raise gr.Error("Exception raised: ",e)
|
127 |
+
|
128 |
+
|
129 |
+
def mix_music_with_video(video_file, dropdown_index, orig_clip_vol, generated_audio_vol):
|
130 |
+
global session_dir, audio_paths
|
131 |
+
orig_clip = VideoFileClip(video_file)
|
132 |
+
print(video_file)
|
133 |
+
print(orig_clip)
|
134 |
+
orig_clip_audio = orig_clip.audio
|
135 |
+
generated_audio = AudioFileClip(audio_paths[dropdown_index-1])
|
136 |
+
|
137 |
+
if orig_clip_audio:
|
138 |
+
orig_clip_audio = volumex(
|
139 |
+
orig_clip_audio, float(orig_clip_vol / 100)
|
140 |
+
)
|
141 |
+
|
142 |
+
if generated_audio:
|
143 |
+
generated_audio = volumex(
|
144 |
+
generated_audio, float(generated_audio_vol / 100)
|
145 |
+
)
|
146 |
+
|
147 |
+
if orig_clip_audio is not None:
|
148 |
+
orig_clip.audio = CompositeAudioClip([orig_clip_audio, generated_audio])
|
149 |
+
else:
|
150 |
+
orig_clip.audio = CompositeAudioClip([generated_audio])
|
151 |
+
|
152 |
+
final_video_path = f"{session_dir}/final_video.mp4"
|
153 |
+
orig_clip.write_videofile(final_video_path)
|
154 |
+
|
155 |
+
orig_clip.close()
|
156 |
+
generated_audio.close()
|
157 |
+
|
158 |
+
return gr.Video(final_video_path, height=640, show_download_button=False, show_label=False, visible=True), gr.DownloadButton("Download final video", value=final_video_path, visible=True, interactive=True)
|
159 |
+
|
160 |
+
|
161 |
+
with gr.Blocks(delete_cache=(1800, 3600)) as demo:
|
162 |
+
# Create session-specific temp dir
|
163 |
+
session_dir = create_session_dir()
|
164 |
+
|
165 |
+
toggle_advanced = gr.State(False)
|
166 |
+
with gr.Row():
|
167 |
+
with gr.Column(scale=1) as sideBar:
|
168 |
+
google_api_key = gr.Textbox(label="Enter your Google API Key to get started:", info="https://ai.google.dev/gemini-api/docs/api-key", type="password")
|
169 |
+
video_descriptor = gr.Dropdown(["Fast", "Quality"], label="Select Video Descriptor", value="Fast", interactive=True)
|
170 |
+
music_generator = gr.Dropdown(["Fast", "Balanced", "Quality"], label="Select Music Generator", value="Fast", interactive=True)
|
171 |
+
num_samples = gr.Slider(minimum=1, maximum=5, value=3, label="Number of samples", interactive=True, step=1)
|
172 |
+
|
173 |
+
advanced_settings_btn = gr.Button("Advanced")
|
174 |
+
with gr.Accordion(open=False, visible=False) as advanced_settings:
|
175 |
+
bpm = gr.Slider(minimum=35, maximum=180, value=100, label="Beats Per Minute", interactive=True, step=1)
|
176 |
+
genre = gr.Dropdown(choices=[
|
177 |
+
"None",
|
178 |
+
"Pop",
|
179 |
+
"Rock",
|
180 |
+
"Hip Hop",
|
181 |
+
"Jazz",
|
182 |
+
"Classical",
|
183 |
+
"Blues",
|
184 |
+
"Country",
|
185 |
+
"EDM",
|
186 |
+
"Metal",
|
187 |
+
"Disco",
|
188 |
+
"Lo-Fi"
|
189 |
+
], value="None", interactive=True, label="Select Music Genre"
|
190 |
+
)
|
191 |
+
user_keywords = gr.Textbox(label="User Keywords", type="text", info="Enter keywords separated by commas")
|
192 |
+
|
193 |
+
generate_music_btn = gr.Button("Generate Music")
|
194 |
+
|
195 |
+
toggle_advanced.change(on_advanced_change, inputs=toggle_advanced, outputs=[advanced_settings])
|
196 |
+
|
197 |
+
advanced_settings_btn.click(lambda x: not x, toggle_advanced, toggle_advanced)
|
198 |
+
|
199 |
+
|
200 |
+
with gr.Column(scale=3.5) as MainWindow:
|
201 |
+
gr.Image("assets/VidTune-Logo-Without-BG.png", width=200, interactive=False, show_download_button=False, show_label=False)
|
202 |
+
gr.Markdown(
|
203 |
+
"""
|
204 |
+
<div style="font-size: 35px; font-weight: bold;">VidTune: Where Videos Find Their Melody</div>
|
205 |
+
<p>VidTune is a web application to effortlessly tailor perfect soundtracks for your videos with AI.</p>
|
206 |
+
""",
|
207 |
+
)
|
208 |
+
uploaded_file = gr.UploadButton(label="Upload Video (Limit 200MB)", file_count="single", type="filepath", file_types=["video"])
|
209 |
+
|
210 |
+
video_file = gr.Video(height=640, show_download_button=False, show_label=False, visible=False)
|
211 |
+
|
212 |
+
video_description_box = gr.Textbox(label="Video Description", visible=True)
|
213 |
+
music_prompt_box = gr.Textbox(label="Music Prompt", visible=True)
|
214 |
+
|
215 |
+
audio_players = [gr.Audio(visible=False) for _ in range(5)]
|
216 |
+
audio_players_selections = gr.Dropdown(choices=["None"], visible=False, interactive=False, label="")
|
217 |
+
|
218 |
+
orig_clip_vol= gr.Slider(minimum=0, maximum=200, value=100, label="Original Audio Volume (%)", visible=False, interactive=False, step=1)
|
219 |
+
|
220 |
+
generated_audio_vol = gr.Slider(minimum=0, maximum=200, value=100, label="Generated Music Volume (%)", visible=False, interactive=False, step=1)
|
221 |
+
|
222 |
+
mix_music_button = gr.Button(visible=False)
|
223 |
+
|
224 |
+
output_video = gr.Video(height=640, show_download_button=False, show_label=False, visible=False)
|
225 |
+
|
226 |
+
download_video_btn = gr.DownloadButton(visible=False, interactive=False)
|
227 |
+
|
228 |
+
uploaded_file.upload(upload_file, uploaded_file, video_file)
|
229 |
+
|
230 |
+
generate_music_btn.click(
|
231 |
+
generate_video_description,
|
232 |
+
inputs=[video_descriptor, google_api_key, toggle_advanced, video_file, genre, bpm, user_keywords],
|
233 |
+
outputs=[video_description_box, music_prompt_box]
|
234 |
+
).success(generate_music,
|
235 |
+
inputs=[music_generator, music_prompt_box, num_samples],
|
236 |
+
outputs=[*audio_players, audio_players_selections])
|
237 |
+
|
238 |
+
audio_players_selections.select(on_select_dropdown, audio_players_selections, outputs=[orig_clip_vol, generated_audio_vol,mix_music_button])
|
239 |
+
|
240 |
+
mix_music_button.click(
|
241 |
+
mix_music_with_video,
|
242 |
+
inputs = [video_file, audio_players_selections, orig_clip_vol, generated_audio_vol],
|
243 |
+
outputs=[output_video, download_video_btn]
|
244 |
+
|
245 |
+
)
|
246 |
+
|
247 |
+
demo.unload(cleanup_session_dir)
|
248 |
+
|
249 |
+
|
250 |
+
if __name__ == "__main__":
|
251 |
+
demo.launch(max_file_size="200mb")
|