sandesh-bharadwaj commited on
Commit
3278a88
·
1 Parent(s): 0fd5831

Working gradio version of VidTune

Browse files
Files changed (1) hide show
  1. app.py +251 -0
app.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from engine import DescribeVideo, GenerateAudio
3
+ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
4
+ from moviepy.audio.fx.volumex import volumex
5
+ import shutil, tempfile, os
6
+
7
+ video_model_map = {
8
+ "Fast": "flash",
9
+ "Quality": "pro",
10
+ }
11
+
12
+ music_model_map = {
13
+ "Fast": "musicgen-stereo-small",
14
+ "Balanced": "musicgen-stereo-medium",
15
+ "Quality": "musicgen-stereo-large",
16
+ }
17
+
18
+ genre_map = {
19
+ "None": None,
20
+ "Pop": "Pop",
21
+ "Rock": "Rock",
22
+ "Hip Hop": "Hip-Hop/Rap",
23
+ "Jazz": "Jazz",
24
+ "Classical": "Classical",
25
+ "Blues": "Blues",
26
+ "Country": "Country",
27
+ "EDM": "Electronic/Dance",
28
+ "Metal": "Metal",
29
+ "Disco": "Disco",
30
+ "Lo-Fi": "Lo-Fi",
31
+ }
32
+
33
+ # Methods for Gradio state updates
34
+ def on_advanced_change(state):
35
+ return gr.Accordion(open=state, visible=state)
36
+
37
+ def upload_file(file):
38
+ return gr.Video(file.name, label=file.name, height=640, show_download_button=False, show_label=False, visible=True)
39
+
40
+ def on_vdc_change(content):
41
+ return gr.Textbox(content, label="Video Description", visible=True)
42
+
43
+ def on_mp_change(content):
44
+ return gr.Textbox(content, label="Music Prompt", visible=True)
45
+
46
+ # Global state variables for Gradio
47
+ video_duration = 0
48
+ audio_paths = None
49
+
50
+
51
+ # Function to generate unique directory for each session
52
+ def create_session_dir():
53
+ return tempfile.mkdtemp()
54
+
55
+ # Function to clean up the session directory
56
+ def cleanup_session_dir():
57
+ if os.path.exists(session_dir):
58
+ shutil.rmtree(session_dir, ignore_errors=True)
59
+
60
+ def on_select_dropdown(value, evt: gr.EventData):
61
+ if value > 0:
62
+ orig_clip_vol = gr.Slider(minimum=0, maximum=200, value=100, label="Original Audio Volume (%)", visible=True, interactive=True, step=1)
63
+
64
+ generated_audio_vol = gr.Slider(minimum=0, maximum=200, value=100, label="Generated Music Volume (%)", visible=True, interactive=True, step=1)
65
+ mix_music_button = gr.Button("Add Generated Music to Video", visible=True, interactive=True)
66
+ return orig_clip_vol, generated_audio_vol, mix_music_button
67
+ else:
68
+ return gr.Slider(minimum=0, maximum=200, value=100, label="Original Audio Volume (%)", visible=False, interactive=False, step=1), gr.Slider(minimum=0, maximum=200, value=100, label="Generated Music Volume (%)", visible=False, interactive=False, step=1), gr.Button(visible=False, interactive=False)
69
+
70
+ # Video Description Generation
71
+ def generate_video_description(video_descriptor, google_api_key, toggle_advanced, video_file, genre, bpm, user_keywords):
72
+ global video_duration
73
+ try:
74
+ if google_api_key == "":
75
+ raise gr.Error("Please enter your Google API Key before continuing!")
76
+ if video_file is None:
77
+ raise gr.Error("Please upload a video before generating music.")
78
+ video_descriptor = DescribeVideo(
79
+ model=video_model_map[video_descriptor], google_api_key=google_api_key
80
+ )
81
+
82
+ if not toggle_advanced:
83
+ video_description = video_descriptor.describe_video(
84
+ video_file, genre=None,
85
+ bpm=None,
86
+ user_keywords=None
87
+ )
88
+ else:
89
+ video_description = video_descriptor.describe_video(
90
+ video_file, genre=genre,
91
+ bpm=bpm,
92
+ user_keywords=user_keywords
93
+ )
94
+ video_duration = VideoFileClip(video_file).duration
95
+
96
+ gr.Info("Video Description generated successfully.")
97
+ gr.Info("Music Prompt generated successfully.")
98
+
99
+ # Return the updated states to update the UI
100
+ return video_description["Content Description"], video_description["Music Prompt"]
101
+
102
+ except Exception as e:
103
+ raise gr.Error("Exception raised: ", e)
104
+
105
+ def generate_music(music_generator, music_prompt, num_samples):
106
+ global video_duration, audio_paths, session_dir
107
+ try:
108
+ audio_generator = GenerateAudio(model=music_model_map[music_generator])
109
+ if audio_generator.device == "cpu":
110
+ gr.Warning("The music generator model is running on CPU. For faster results, consider using a GPU.")
111
+
112
+ music_prompt = [music_prompt] * num_samples
113
+ audio_generator.generate_audio(music_prompt, duration=video_duration)
114
+ audio_paths = audio_generator.save_audio(audio_dir=session_dir)
115
+
116
+ gr.Info("Music generated successfully.")
117
+
118
+ show_players = [gr.Audio(visible=True, value=audio_path, show_label=False, scale=0.5) for audio_path in audio_paths]
119
+ hide_players = [gr.Audio(visible=False) for _ in range(5-len(audio_paths))]
120
+
121
+ dropdown_choices = ["None"] + [f"Generated Music {i+1}" for i in range(len(show_players))]
122
+ selections = gr.Dropdown(choices=dropdown_choices, visible=True, interactive=True, label="Select one of the generated audio files for further processing:", value="None", type='index')
123
+
124
+ return show_players + hide_players + [selections]
125
+ except Exception as e:
126
+ raise gr.Error("Exception raised: ",e)
127
+
128
+
129
+ def mix_music_with_video(video_file, dropdown_index, orig_clip_vol, generated_audio_vol):
130
+ global session_dir, audio_paths
131
+ orig_clip = VideoFileClip(video_file)
132
+ print(video_file)
133
+ print(orig_clip)
134
+ orig_clip_audio = orig_clip.audio
135
+ generated_audio = AudioFileClip(audio_paths[dropdown_index-1])
136
+
137
+ if orig_clip_audio:
138
+ orig_clip_audio = volumex(
139
+ orig_clip_audio, float(orig_clip_vol / 100)
140
+ )
141
+
142
+ if generated_audio:
143
+ generated_audio = volumex(
144
+ generated_audio, float(generated_audio_vol / 100)
145
+ )
146
+
147
+ if orig_clip_audio is not None:
148
+ orig_clip.audio = CompositeAudioClip([orig_clip_audio, generated_audio])
149
+ else:
150
+ orig_clip.audio = CompositeAudioClip([generated_audio])
151
+
152
+ final_video_path = f"{session_dir}/final_video.mp4"
153
+ orig_clip.write_videofile(final_video_path)
154
+
155
+ orig_clip.close()
156
+ generated_audio.close()
157
+
158
+ return gr.Video(final_video_path, height=640, show_download_button=False, show_label=False, visible=True), gr.DownloadButton("Download final video", value=final_video_path, visible=True, interactive=True)
159
+
160
+
161
+ with gr.Blocks(delete_cache=(1800, 3600)) as demo:
162
+ # Create session-specific temp dir
163
+ session_dir = create_session_dir()
164
+
165
+ toggle_advanced = gr.State(False)
166
+ with gr.Row():
167
+ with gr.Column(scale=1) as sideBar:
168
+ google_api_key = gr.Textbox(label="Enter your Google API Key to get started:", info="https://ai.google.dev/gemini-api/docs/api-key", type="password")
169
+ video_descriptor = gr.Dropdown(["Fast", "Quality"], label="Select Video Descriptor", value="Fast", interactive=True)
170
+ music_generator = gr.Dropdown(["Fast", "Balanced", "Quality"], label="Select Music Generator", value="Fast", interactive=True)
171
+ num_samples = gr.Slider(minimum=1, maximum=5, value=3, label="Number of samples", interactive=True, step=1)
172
+
173
+ advanced_settings_btn = gr.Button("Advanced")
174
+ with gr.Accordion(open=False, visible=False) as advanced_settings:
175
+ bpm = gr.Slider(minimum=35, maximum=180, value=100, label="Beats Per Minute", interactive=True, step=1)
176
+ genre = gr.Dropdown(choices=[
177
+ "None",
178
+ "Pop",
179
+ "Rock",
180
+ "Hip Hop",
181
+ "Jazz",
182
+ "Classical",
183
+ "Blues",
184
+ "Country",
185
+ "EDM",
186
+ "Metal",
187
+ "Disco",
188
+ "Lo-Fi"
189
+ ], value="None", interactive=True, label="Select Music Genre"
190
+ )
191
+ user_keywords = gr.Textbox(label="User Keywords", type="text", info="Enter keywords separated by commas")
192
+
193
+ generate_music_btn = gr.Button("Generate Music")
194
+
195
+ toggle_advanced.change(on_advanced_change, inputs=toggle_advanced, outputs=[advanced_settings])
196
+
197
+ advanced_settings_btn.click(lambda x: not x, toggle_advanced, toggle_advanced)
198
+
199
+
200
+ with gr.Column(scale=3.5) as MainWindow:
201
+ gr.Image("assets/VidTune-Logo-Without-BG.png", width=200, interactive=False, show_download_button=False, show_label=False)
202
+ gr.Markdown(
203
+ """
204
+ <div style="font-size: 35px; font-weight: bold;">VidTune: Where Videos Find Their Melody</div>
205
+ <p>VidTune is a web application to effortlessly tailor perfect soundtracks for your videos with AI.</p>
206
+ """,
207
+ )
208
+ uploaded_file = gr.UploadButton(label="Upload Video (Limit 200MB)", file_count="single", type="filepath", file_types=["video"])
209
+
210
+ video_file = gr.Video(height=640, show_download_button=False, show_label=False, visible=False)
211
+
212
+ video_description_box = gr.Textbox(label="Video Description", visible=True)
213
+ music_prompt_box = gr.Textbox(label="Music Prompt", visible=True)
214
+
215
+ audio_players = [gr.Audio(visible=False) for _ in range(5)]
216
+ audio_players_selections = gr.Dropdown(choices=["None"], visible=False, interactive=False, label="")
217
+
218
+ orig_clip_vol= gr.Slider(minimum=0, maximum=200, value=100, label="Original Audio Volume (%)", visible=False, interactive=False, step=1)
219
+
220
+ generated_audio_vol = gr.Slider(minimum=0, maximum=200, value=100, label="Generated Music Volume (%)", visible=False, interactive=False, step=1)
221
+
222
+ mix_music_button = gr.Button(visible=False)
223
+
224
+ output_video = gr.Video(height=640, show_download_button=False, show_label=False, visible=False)
225
+
226
+ download_video_btn = gr.DownloadButton(visible=False, interactive=False)
227
+
228
+ uploaded_file.upload(upload_file, uploaded_file, video_file)
229
+
230
+ generate_music_btn.click(
231
+ generate_video_description,
232
+ inputs=[video_descriptor, google_api_key, toggle_advanced, video_file, genre, bpm, user_keywords],
233
+ outputs=[video_description_box, music_prompt_box]
234
+ ).success(generate_music,
235
+ inputs=[music_generator, music_prompt_box, num_samples],
236
+ outputs=[*audio_players, audio_players_selections])
237
+
238
+ audio_players_selections.select(on_select_dropdown, audio_players_selections, outputs=[orig_clip_vol, generated_audio_vol,mix_music_button])
239
+
240
+ mix_music_button.click(
241
+ mix_music_with_video,
242
+ inputs = [video_file, audio_players_selections, orig_clip_vol, generated_audio_vol],
243
+ outputs=[output_video, download_video_btn]
244
+
245
+ )
246
+
247
+ demo.unload(cleanup_session_dir)
248
+
249
+
250
+ if __name__ == "__main__":
251
+ demo.launch(max_file_size="200mb")