ginipick commited on
Commit
c9e0bab
Β·
verified Β·
1 Parent(s): e649a54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +502 -29
app.py CHANGED
@@ -1,35 +1,508 @@
 
 
 
 
1
  import os
2
- import sys
3
- import streamlit as st
4
- from tempfile import NamedTemporaryFile
5
-
6
- def main():
7
- try:
8
- # Get the code from secrets
9
- code = os.environ.get("MAIN_CODE")
10
-
11
- if not code:
12
- st.error("⚠️ The application code wasn't found in secrets. Please add the MAIN_CODE secret.")
13
- return
14
-
15
- # Create a temporary Python file
16
- with NamedTemporaryFile(suffix='.py', delete=False, mode='w') as tmp:
17
- tmp.write(code)
18
- tmp_path = tmp.name
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- # Execute the code
21
- exec(compile(code, tmp_path, 'exec'), globals())
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Clean up the temporary file
24
- try:
25
- os.unlink(tmp_path)
26
- except:
27
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- except Exception as e:
30
- st.error(f"⚠️ Error loading or executing the application: {str(e)}")
31
- import traceback
32
- st.code(traceback.format_exc())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  if __name__ == "__main__":
35
- main()
 
1
+ import types
2
+ import random
3
+ import spaces
4
+ import logging
5
  import os
6
+ from pathlib import Path
7
+ from datetime import datetime
8
+
9
+ import torch
10
+ import numpy as np
11
+ import torchaudio
12
+ from diffusers import AutoencoderKLWan, UniPCMultistepScheduler
13
+ from diffusers.utils import export_to_video
14
+ from diffusers import AutoModel
15
+ import gradio as gr
16
+ import tempfile
17
+ from huggingface_hub import hf_hub_download
18
+
19
+ from src.pipeline_wan_nag import NAGWanPipeline
20
+ from src.transformer_wan_nag import NagWanTransformer3DModel
21
+
22
+ # MMAudio imports
23
+ try:
24
+ import mmaudio
25
+ except ImportError:
26
+ os.system("pip install -e .")
27
+ import mmaudio
28
+
29
+ from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate as mmaudio_generate,
30
+ load_video, make_video, setup_eval_logging)
31
+ from mmaudio.model.flow_matching import FlowMatching
32
+ from mmaudio.model.networks import MMAudio, get_my_mmaudio
33
+ from mmaudio.model.sequence_config import SequenceConfig
34
+ from mmaudio.model.utils.features_utils import FeaturesUtils
35
+
36
+ # NAG Video Settings
37
+ MOD_VALUE = 32
38
+ DEFAULT_DURATION_SECONDS = 4
39
+ DEFAULT_STEPS = 4
40
+ DEFAULT_SEED = 2025
41
+ DEFAULT_H_SLIDER_VALUE = 480
42
+ DEFAULT_W_SLIDER_VALUE = 832
43
+ NEW_FORMULA_MAX_AREA = 480.0 * 832.0
44
+
45
+ SLIDER_MIN_H, SLIDER_MAX_H = 128, 896
46
+ SLIDER_MIN_W, SLIDER_MAX_W = 128, 896
47
+ MAX_SEED = np.iinfo(np.int32).max
48
+
49
+ FIXED_FPS = 16
50
+ MIN_FRAMES_MODEL = 8
51
+ MAX_FRAMES_MODEL = 129
52
+
53
+ DEFAULT_NAG_NEGATIVE_PROMPT = "Static, motionless, still, ugly, bad quality, worst quality, poorly drawn, low resolution, blurry, lack of details"
54
+ DEFAULT_AUDIO_NEGATIVE_PROMPT = "music"
55
+
56
+ # NAG Model Settings
57
+ MODEL_ID = "Wan-AI/Wan2.1-T2V-14B-Diffusers"
58
+ SUB_MODEL_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
59
+ SUB_MODEL_FILENAME = "Wan14BT2VFusioniX_fp16_.safetensors"
60
+ LORA_REPO_ID = "Kijai/WanVideo_comfy"
61
+ LORA_FILENAME = "Wan21_CausVid_14B_T2V_lora_rank32.safetensors"
62
+
63
+ # MMAudio Settings
64
+ torch.backends.cuda.matmul.allow_tf32 = True
65
+ torch.backends.cudnn.allow_tf32 = True
66
+ log = logging.getLogger()
67
+ device = 'cuda'
68
+ dtype = torch.bfloat16
69
+ audio_model_config: ModelConfig = all_model_cfg['large_44k_v2']
70
+ audio_model_config.download_if_needed()
71
+ setup_eval_logging()
72
+
73
+ # Initialize NAG Video Model
74
+ vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
75
+ wan_path = hf_hub_download(repo_id=SUB_MODEL_ID, filename=SUB_MODEL_FILENAME)
76
+ transformer = NagWanTransformer3DModel.from_single_file(wan_path, torch_dtype=torch.bfloat16)
77
+ pipe = NAGWanPipeline.from_pretrained(
78
+ MODEL_ID, vae=vae, transformer=transformer, torch_dtype=torch.bfloat16
79
+ )
80
+ pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=5.0)
81
+ pipe.to("cuda")
82
+
83
+ pipe.transformer.__class__.attn_processors = NagWanTransformer3DModel.attn_processors
84
+ pipe.transformer.__class__.set_attn_processor = NagWanTransformer3DModel.set_attn_processor
85
+ pipe.transformer.__class__.forward = NagWanTransformer3DModel.forward
86
+
87
+ # Initialize MMAudio Model
88
+ def get_mmaudio_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
89
+ seq_cfg = audio_model_config.seq_cfg
90
+
91
+ net: MMAudio = get_my_mmaudio(audio_model_config.model_name).to(device, dtype).eval()
92
+ net.load_weights(torch.load(audio_model_config.model_path, map_location=device, weights_only=True))
93
+ log.info(f'Loaded MMAudio weights from {audio_model_config.model_path}')
94
+
95
+ feature_utils = FeaturesUtils(tod_vae_ckpt=audio_model_config.vae_path,
96
+ synchformer_ckpt=audio_model_config.synchformer_ckpt,
97
+ enable_conditions=True,
98
+ mode=audio_model_config.mode,
99
+ bigvgan_vocoder_ckpt=audio_model_config.bigvgan_16k_path,
100
+ need_vae_encoder=False)
101
+ feature_utils = feature_utils.to(device, dtype).eval()
102
+
103
+ return net, feature_utils, seq_cfg
104
+
105
+ audio_net, audio_feature_utils, audio_seq_cfg = get_mmaudio_model()
106
+
107
+ # Audio generation function
108
+ @torch.inference_mode()
109
+ def add_audio_to_video(video_path, prompt, audio_negative_prompt, audio_steps, audio_cfg_strength, duration):
110
+ """Generate and add audio to video using MMAudio"""
111
+ rng = torch.Generator(device=device)
112
+ rng.seed() # Random seed for audio
113
+ fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=audio_steps)
114
+
115
+ video_info = load_video(video_path, duration)
116
+ clip_frames = video_info.clip_frames
117
+ sync_frames = video_info.sync_frames
118
+ duration = video_info.duration_sec
119
+ clip_frames = clip_frames.unsqueeze(0)
120
+ sync_frames = sync_frames.unsqueeze(0)
121
+ audio_seq_cfg.duration = duration
122
+ audio_net.update_seq_lengths(audio_seq_cfg.latent_seq_len, audio_seq_cfg.clip_seq_len, audio_seq_cfg.sync_seq_len)
123
+
124
+ audios = mmaudio_generate(clip_frames,
125
+ sync_frames, [prompt],
126
+ negative_text=[audio_negative_prompt],
127
+ feature_utils=audio_feature_utils,
128
+ net=audio_net,
129
+ fm=fm,
130
+ rng=rng,
131
+ cfg_strength=audio_cfg_strength)
132
+ audio = audios.float().cpu()[0]
133
+
134
+ # Create video with audio
135
+ video_with_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
136
+ make_video(video_info, video_with_audio_path, audio, sampling_rate=audio_seq_cfg.sampling_rate)
137
+
138
+ return video_with_audio_path
139
+
140
+ # Combined generation function
141
+ def get_duration(prompt, nag_negative_prompt, nag_scale, height, width, duration_seconds,
142
+ steps, seed, randomize_seed, enable_audio, audio_negative_prompt,
143
+ audio_steps, audio_cfg_strength):
144
+ # Calculate total duration including audio processing if enabled
145
+ video_duration = int(duration_seconds) * int(steps) * 2.25 + 5
146
+ audio_duration = 30 if enable_audio else 0 # Additional time for audio processing
147
+ return video_duration + audio_duration
148
+
149
+ @spaces.GPU(duration=get_duration)
150
+ def generate_video_with_audio(
151
+ prompt,
152
+ nag_negative_prompt, nag_scale,
153
+ height=DEFAULT_H_SLIDER_VALUE, width=DEFAULT_W_SLIDER_VALUE, duration_seconds=DEFAULT_DURATION_SECONDS,
154
+ steps=DEFAULT_STEPS,
155
+ seed=DEFAULT_SEED, randomize_seed=False,
156
+ enable_audio=True, audio_negative_prompt=DEFAULT_AUDIO_NEGATIVE_PROMPT,
157
+ audio_steps=25, audio_cfg_strength=4.5,
158
+ ):
159
+ # Generate video first
160
+ target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
161
+ target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
162
+
163
+ num_frames = np.clip(int(round(int(duration_seconds) * FIXED_FPS) + 1), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
164
+
165
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
166
+
167
+ with torch.inference_mode():
168
+ nag_output_frames_list = pipe(
169
+ prompt=prompt,
170
+ nag_negative_prompt=nag_negative_prompt,
171
+ nag_scale=nag_scale,
172
+ nag_tau=3.5,
173
+ nag_alpha=0.5,
174
+ height=target_h, width=target_w, num_frames=num_frames,
175
+ guidance_scale=0.,
176
+ num_inference_steps=int(steps),
177
+ generator=torch.Generator(device="cuda").manual_seed(current_seed)
178
+ ).frames[0]
179
+
180
+ # Save initial video without audio
181
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
182
+ temp_video_path = tmpfile.name
183
+ export_to_video(nag_output_frames_list, temp_video_path, fps=FIXED_FPS)
184
+
185
+ # Add audio if enabled
186
+ if enable_audio:
187
+ try:
188
+ final_video_path = add_audio_to_video(
189
+ temp_video_path,
190
+ prompt, # Use the same prompt for audio generation
191
+ audio_negative_prompt,
192
+ audio_steps,
193
+ audio_cfg_strength,
194
+ duration_seconds
195
+ )
196
+ # Clean up temp video
197
+ if os.path.exists(temp_video_path):
198
+ os.remove(temp_video_path)
199
+ except Exception as e:
200
+ log.error(f"Audio generation failed: {e}")
201
+ final_video_path = temp_video_path
202
+ else:
203
+ final_video_path = temp_video_path
204
+
205
+ return final_video_path, current_seed
206
+
207
+ # Example generation function
208
+ def generate_with_example(prompt, nag_negative_prompt, nag_scale):
209
+ video_path, seed = generate_video_with_audio(
210
+ prompt=prompt,
211
+ nag_negative_prompt=nag_negative_prompt, nag_scale=nag_scale,
212
+ height=DEFAULT_H_SLIDER_VALUE, width=DEFAULT_W_SLIDER_VALUE,
213
+ duration_seconds=DEFAULT_DURATION_SECONDS,
214
+ steps=DEFAULT_STEPS,
215
+ seed=DEFAULT_SEED, randomize_seed=False,
216
+ enable_audio=True, audio_negative_prompt=DEFAULT_AUDIO_NEGATIVE_PROMPT,
217
+ audio_steps=25, audio_cfg_strength=4.5,
218
+ )
219
+ return video_path, \
220
+ DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE, \
221
+ DEFAULT_DURATION_SECONDS, DEFAULT_STEPS, seed, \
222
+ True, DEFAULT_AUDIO_NEGATIVE_PROMPT, 25, 4.5
223
+
224
+ # Examples with audio descriptions
225
+ examples = [
226
+ ["Midnight highway outside a neon-lit city. A black 1973 Porsche 911 Carrera RS speeds at 120 km/h. Inside, a stylish singer-guitarist sings while driving, vintage sunburst guitar on the passenger seat. Sodium streetlights streak over the hood; RGB panels shift magenta to blue on the driver. Camera: drone dive, Russian-arm low wheel shot, interior gimbal, FPV barrel roll, overhead spiral. Neo-noir palette, rain-slick asphalt reflections, roaring flat-six engine blended with live guitar.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
227
+ ["Arena rock concert packed with 20 000 fans. A flamboyant lead guitarist in leather jacket and mirrored aviators shreds a cherry-red Flying V on a thrust stage. Pyro flames shoot up on every downbeat, COβ‚‚ jets burst behind. Moving-head spotlights swirl teal and amber, follow-spots rim-light the guitarist’s hair. Steadicam 360-orbit, crane shot rising over crowd, ultra-slow-motion pick attack at 1 000 fps. Film-grain teal-orange grade, thunderous crowd roar mixes with screaming guitar solo.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
228
+ ["Golden-hour countryside road winding through rolling wheat fields. A man and woman ride a vintage cafΓ©-racer motorcycle, hair and scarf fluttering in the warm breeze. Drone chase shot reveals endless patchwork farmland; low slider along rear wheel captures dust trail. Sun-flare back-lights the riders, lens blooms on highlights. Soft acoustic rock underscore; engine rumble mixed at –8 dB. Warm pastel color grade, gentle film-grain for nostalgic vibe.", DEFAULT_NAG_NEGATIVE_PROMPT, 11],
229
+ ]
230
+
231
+ # CSS styling
232
+ css = """
233
+ .container {
234
+ max-width: 1400px;
235
+ margin: auto;
236
+ padding: 20px;
237
+ }
238
+ .main-title {
239
+ text-align: center;
240
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
241
+ -webkit-background-clip: text;
242
+ -webkit-text-fill-color: transparent;
243
+ font-size: 2.5em;
244
+ font-weight: bold;
245
+ margin-bottom: 10px;
246
+ }
247
+ .subtitle {
248
+ text-align: center;
249
+ color: #6b7280;
250
+ margin-bottom: 30px;
251
+ }
252
+ .prompt-container {
253
+ background: linear-gradient(135deg, #f3f4f6 0%, #e5e7eb 100%);
254
+ border-radius: 15px;
255
+ padding: 20px;
256
+ margin-bottom: 20px;
257
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
258
+ }
259
+ .generate-btn {
260
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
261
+ color: white;
262
+ font-size: 1.2em;
263
+ font-weight: bold;
264
+ padding: 15px 30px;
265
+ border-radius: 10px;
266
+ border: none;
267
+ cursor: pointer;
268
+ transition: all 0.3s ease;
269
+ width: 100%;
270
+ margin-top: 20px;
271
+ }
272
+ .generate-btn:hover {
273
+ transform: translateY(-2px);
274
+ box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4);
275
+ }
276
+ .video-output {
277
+ border-radius: 15px;
278
+ overflow: hidden;
279
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
280
+ background: #1a1a1a;
281
+ padding: 10px;
282
+ }
283
+ .settings-panel {
284
+ background: #f9fafb;
285
+ border-radius: 15px;
286
+ padding: 20px;
287
+ box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
288
+ }
289
+ .slider-container {
290
+ background: white;
291
+ padding: 15px;
292
+ border-radius: 10px;
293
+ margin-bottom: 15px;
294
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.05);
295
+ }
296
+ .info-box {
297
+ background: linear-gradient(135deg, #e0e7ff 0%, #c7d2fe 100%);
298
+ border-radius: 10px;
299
+ padding: 15px;
300
+ margin: 10px 0;
301
+ border-left: 4px solid #667eea;
302
+ }
303
+ .audio-settings {
304
+ background: linear-gradient(135deg, #fef3c7 0%, #fde68a 100%);
305
+ border-radius: 10px;
306
+ padding: 15px;
307
+ margin-top: 10px;
308
+ border-left: 4px solid #f59e0b;
309
+ }
310
+ """
311
+
312
+ # Gradio interface
313
+ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
314
+ with gr.Column(elem_classes="container"):
315
+ gr.HTML("""
316
+ <h1 class="main-title">🎬 VEO3 Free</h1>
317
+ <p class="subtitle">Wan2.1-T2V-14B + Fast 4-step with NAG + Automatic Audio Generation</p>
318
+ """)
319
 
320
+
321
+ gr.HTML(
322
+ """
323
+ <div class='container' style='display:flex; justify-content:center; gap:12px;'>
324
+ <a href="https://huggingface.co/spaces/openfree/Best-AI" target="_blank">
325
+ <img src="https://img.shields.io/static/v1?label=OpenFree&message=BEST%20AI%20Services&color=%230000ff&labelColor=%23000080&logo=huggingface&logoColor=%23ffa500&style=for-the-badge" alt="OpenFree badge">
326
+ </a>
327
+
328
+ <a href="https://discord.gg/openfreeai" target="_blank">
329
+ <img src="https://img.shields.io/static/v1?label=Discord&message=Openfree%20AI&color=%230000ff&labelColor=%23800080&logo=discord&logoColor=white&style=for-the-badge" alt="Discord badge">
330
+ </a>
331
+ </div>
332
+ """
333
+ )
334
+
335
 
336
+ with gr.Row():
337
+ with gr.Column(scale=1):
338
+ with gr.Group(elem_classes="prompt-container"):
339
+ prompt = gr.Textbox(
340
+ label="✨ Video Prompt (also used for audio generation)",
341
+ placeholder="Describe your video scene in detail...",
342
+ lines=3,
343
+ elem_classes="prompt-input"
344
+ )
345
+
346
+ with gr.Accordion("🎨 Advanced Video Settings", open=False):
347
+ nag_negative_prompt = gr.Textbox(
348
+ label="Video Negative Prompt",
349
+ value=DEFAULT_NAG_NEGATIVE_PROMPT,
350
+ lines=2,
351
+ )
352
+ nag_scale = gr.Slider(
353
+ label="NAG Scale",
354
+ minimum=1.0,
355
+ maximum=20.0,
356
+ step=0.25,
357
+ value=11.0,
358
+ info="Higher values = stronger guidance"
359
+ )
360
+
361
+ with gr.Group(elem_classes="settings-panel"):
362
+ gr.Markdown("### βš™οΈ Video Settings")
363
+
364
+ with gr.Row():
365
+ duration_seconds_input = gr.Slider(
366
+ minimum=1,
367
+ maximum=8,
368
+ step=1,
369
+ value=DEFAULT_DURATION_SECONDS,
370
+ label="πŸ“± Duration (seconds)",
371
+ elem_classes="slider-container"
372
+ )
373
+ steps_slider = gr.Slider(
374
+ minimum=1,
375
+ maximum=8,
376
+ step=1,
377
+ value=DEFAULT_STEPS,
378
+ label="πŸ”„ Inference Steps",
379
+ elem_classes="slider-container"
380
+ )
381
+
382
+ with gr.Row():
383
+ height_input = gr.Slider(
384
+ minimum=SLIDER_MIN_H,
385
+ maximum=SLIDER_MAX_H,
386
+ step=MOD_VALUE,
387
+ value=DEFAULT_H_SLIDER_VALUE,
388
+ label=f"πŸ“ Height (Γ—{MOD_VALUE})",
389
+ elem_classes="slider-container"
390
+ )
391
+ width_input = gr.Slider(
392
+ minimum=SLIDER_MIN_W,
393
+ maximum=SLIDER_MAX_W,
394
+ step=MOD_VALUE,
395
+ value=DEFAULT_W_SLIDER_VALUE,
396
+ label=f"πŸ“ Width (Γ—{MOD_VALUE})",
397
+ elem_classes="slider-container"
398
+ )
399
+
400
+ with gr.Row():
401
+ seed_input = gr.Slider(
402
+ label="🌱 Seed",
403
+ minimum=0,
404
+ maximum=MAX_SEED,
405
+ step=1,
406
+ value=DEFAULT_SEED,
407
+ interactive=True
408
+ )
409
+ randomize_seed_checkbox = gr.Checkbox(
410
+ label="🎲 Random Seed",
411
+ value=True,
412
+ interactive=True
413
+ )
414
+
415
+ with gr.Group(elem_classes="audio-settings"):
416
+ gr.Markdown("### 🎡 Audio Generation Settings")
417
+
418
+ enable_audio = gr.Checkbox(
419
+ label="πŸ”Š Enable Automatic Audio Generation",
420
+ value=True,
421
+ interactive=True
422
+ )
423
+
424
+ with gr.Column(visible=True) as audio_settings_group:
425
+ audio_negative_prompt = gr.Textbox(
426
+ label="Audio Negative Prompt",
427
+ value=DEFAULT_AUDIO_NEGATIVE_PROMPT,
428
+ placeholder="Elements to avoid in audio (e.g., music, speech)",
429
+ )
430
+
431
+ with gr.Row():
432
+ audio_steps = gr.Slider(
433
+ minimum=10,
434
+ maximum=50,
435
+ step=5,
436
+ value=25,
437
+ label="🎚️ Audio Steps",
438
+ info="More steps = better quality"
439
+ )
440
+ audio_cfg_strength = gr.Slider(
441
+ minimum=1.0,
442
+ maximum=10.0,
443
+ step=0.5,
444
+ value=4.5,
445
+ label="πŸŽ›οΈ Audio Guidance",
446
+ info="Strength of prompt guidance"
447
+ )
448
+
449
+ # Toggle audio settings visibility
450
+ enable_audio.change(
451
+ fn=lambda x: gr.update(visible=x),
452
+ inputs=[enable_audio],
453
+ outputs=[audio_settings_group]
454
+ )
455
+
456
+ generate_button = gr.Button(
457
+ "🎬 Generate Video with Audio",
458
+ variant="primary",
459
+ elem_classes="generate-btn"
460
+ )
461
 
462
+ with gr.Column(scale=1):
463
+ video_output = gr.Video(
464
+ label="Generated Video with Audio",
465
+ autoplay=True,
466
+ interactive=False,
467
+ elem_classes="video-output"
468
+ )
469
+
470
+ gr.HTML("""
471
+ <div style="text-align: center; margin-top: 20px; color: #6b7280;">
472
+ <p>πŸ’‘ Tip: The same prompt is used for both video and audio generation!</p>
473
+ <p>🎧 Audio is automatically matched to the visual content</p>
474
+ </div>
475
+ """)
476
+
477
+ gr.Markdown("### 🎯 Example Prompts")
478
+ gr.Examples(
479
+ examples=examples,
480
+ fn=generate_with_example,
481
+ inputs=[prompt, nag_negative_prompt, nag_scale],
482
+ outputs=[
483
+ video_output,
484
+ height_input, width_input, duration_seconds_input,
485
+ steps_slider, seed_input,
486
+ enable_audio, audio_negative_prompt, audio_steps, audio_cfg_strength
487
+ ],
488
+ cache_examples="lazy"
489
+ )
490
+
491
+ # Connect UI elements
492
+ ui_inputs = [
493
+ prompt,
494
+ nag_negative_prompt, nag_scale,
495
+ height_input, width_input, duration_seconds_input,
496
+ steps_slider,
497
+ seed_input, randomize_seed_checkbox,
498
+ enable_audio, audio_negative_prompt, audio_steps, audio_cfg_strength,
499
+ ]
500
+
501
+ generate_button.click(
502
+ fn=generate_video_with_audio,
503
+ inputs=ui_inputs,
504
+ outputs=[video_output, seed_input],
505
+ )
506
 
507
  if __name__ == "__main__":
508
+ demo.queue().launch()