Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
11f5aeb
1
Parent(s):
cfd186d
cfg and steps params added
Browse files
app.py
CHANGED
@@ -19,6 +19,58 @@ from stable_audio_tools.inference.generation import generate_diffusion_cond
|
|
19 |
from gradio_client import Client, handle_file
|
20 |
from contextlib import contextmanager
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
# Global model storage
|
23 |
model_cache = {}
|
24 |
model_lock = threading.Lock()
|
@@ -90,7 +142,7 @@ def load_stable_audio_model():
|
|
90 |
model_cache['stable_audio_device'])
|
91 |
|
92 |
@spaces.GPU(duration=12)
|
93 |
-
def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
94 |
"""Generate a BPM-aware loop using stable-audio-open-small"""
|
95 |
try:
|
96 |
total_start = time.time()
|
@@ -105,7 +157,6 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
|
105 |
seconds_per_bar = seconds_per_beat * 4 # 4/4 time
|
106 |
target_loop_duration = seconds_per_bar * bars
|
107 |
|
108 |
-
|
109 |
# Enhance prompt based on loop type and BPM - minimal modification
|
110 |
if loop_type == "drums":
|
111 |
enhanced_prompt = f"{prompt} {bpm}bpm"
|
@@ -127,6 +178,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
|
127 |
print(f"🎵 Generating {loop_type} loop:")
|
128 |
print(f" Enhanced prompt: {enhanced_prompt}")
|
129 |
print(f" Target duration: {target_loop_duration:.2f}s ({bars} bars at {bpm}bpm)")
|
|
|
130 |
print(f" Seed: {seed}")
|
131 |
|
132 |
# Prepare conditioning
|
@@ -145,7 +197,6 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
|
145 |
# Generation timing
|
146 |
generation_start = time.time()
|
147 |
|
148 |
-
# Removed aggressive resource cleanup wrapper
|
149 |
# Clear GPU cache once before generation (not after)
|
150 |
# if device == "cuda":
|
151 |
# torch.cuda.empty_cache()
|
@@ -153,8 +204,8 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
|
153 |
with torch.cuda.amp.autocast(enabled=(device == "cuda")):
|
154 |
output = generate_diffusion_cond(
|
155 |
model,
|
156 |
-
steps=
|
157 |
-
cfg_scale=
|
158 |
conditioning=conditioning,
|
159 |
negative_conditioning=negative_conditioning,
|
160 |
sample_size=config["sample_size"],
|
@@ -203,7 +254,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
|
203 |
print(f" Total: {total_time:.2f}s")
|
204 |
print(f"✅ {loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
|
205 |
|
206 |
-
return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s"
|
207 |
|
208 |
except Exception as e:
|
209 |
print(f"❌ Generation error: {str(e)}")
|
@@ -340,6 +391,15 @@ def calculate_optimal_bars(bpm):
|
|
340 |
return bars
|
341 |
return 1
|
342 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
# ========== GRADIO INTERFACE ==========
|
344 |
|
345 |
with gr.Blocks(title="stable-melodyflow") as iface:
|
@@ -398,6 +458,7 @@ with gr.Blocks(title="stable-melodyflow") as iface:
|
|
398 |
- bpm-aware generation ensures perfect sync between loops (most the time lol)
|
399 |
- negative prompting separates drums from instruments (most the time)
|
400 |
- smart bar calculation optimizes loop length for the BPM
|
|
|
401 |
""")
|
402 |
|
403 |
# ========== GLOBAL CONTROLS ==========
|
@@ -425,6 +486,25 @@ with gr.Blocks(title="stable-melodyflow") as iface:
|
|
425 |
info="prompt applied to either loop. make it more drum/instrument specific for best results"
|
426 |
)
|
427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
# Auto-suggest optimal bars based on BPM
|
429 |
def update_suggested_bars(bpm):
|
430 |
optimal = calculate_optimal_bars(bpm)
|
@@ -475,11 +555,20 @@ with gr.Blocks(title="stable-melodyflow") as iface:
|
|
475 |
|
476 |
with gr.Row():
|
477 |
with gr.Column():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
478 |
transform_prompt = gr.Textbox(
|
479 |
label="transformation prompt",
|
480 |
-
value="
|
481 |
-
placeholder="
|
482 |
-
lines=
|
|
|
483 |
)
|
484 |
|
485 |
with gr.Column():
|
@@ -504,17 +593,24 @@ with gr.Blocks(title="stable-melodyflow") as iface:
|
|
504 |
|
505 |
# ========== EVENT HANDLERS ==========
|
506 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
507 |
# Generate drums
|
508 |
generate_drums_btn.click(
|
509 |
generate_stable_audio_loop,
|
510 |
-
inputs=[base_prompt, gr.State("drums"), global_bpm, global_bars, drums_seed],
|
511 |
outputs=[drums_audio, drums_status]
|
512 |
)
|
513 |
|
514 |
# Generate instruments
|
515 |
generate_instruments_btn.click(
|
516 |
generate_stable_audio_loop,
|
517 |
-
inputs=[base_prompt, gr.State("instruments"), global_bpm, global_bars, instruments_seed],
|
518 |
outputs=[instruments_audio, instruments_status]
|
519 |
)
|
520 |
|
@@ -531,20 +627,6 @@ with gr.Blocks(title="stable-melodyflow") as iface:
|
|
531 |
inputs=[combined_audio, transform_prompt, transform_solver, transform_flowstep],
|
532 |
outputs=[transformed_audio, transform_status]
|
533 |
)
|
534 |
-
|
535 |
-
# # ========== EXAMPLES ==========
|
536 |
-
# gr.Markdown("## 🎯 Example Workflows")
|
537 |
-
|
538 |
-
# examples = gr.Examples(
|
539 |
-
# examples=[
|
540 |
-
# ["techno", 128, 4, "aggressive industrial techno"],
|
541 |
-
# ["jazz", 110, 2, "smooth lo-fi jazz with vinyl crackle"],
|
542 |
-
# ["ambient", 90, 8, "ethereal ambient soundscape"],
|
543 |
-
# ["hip-hop", 100, 4, "classic boom bap hip-hop"],
|
544 |
-
# ["drum and bass", 140, 4, "liquid drum and bass"],
|
545 |
-
# ],
|
546 |
-
# inputs=[base_prompt, global_bpm, global_bars, transform_prompt],
|
547 |
-
# )
|
548 |
|
549 |
if __name__ == "__main__":
|
550 |
iface.launch()
|
|
|
19 |
from gradio_client import Client, handle_file
|
20 |
from contextlib import contextmanager
|
21 |
|
22 |
+
# MelodyFlow Variations - extracted from variations.py
|
23 |
+
MELODYFLOW_VARIATIONS = {
|
24 |
+
# Acoustic Instruments
|
25 |
+
'accordion_folk': "Lively accordion music with a European folk feeling, perfect for a travel documentary about traditional culture and street performances in Paris",
|
26 |
+
'banjo_bluegrass': "Authentic bluegrass banjo band performance with rich picking patterns, ideal for a heartfelt documentary about American rural life and traditional crafts",
|
27 |
+
'piano_classical': "Expressive classical piano performance with dynamic range and emotional depth, ideal for a luxury brand commercial",
|
28 |
+
'celtic': "Traditional Celtic arrangement with fiddle and flute, perfect for a documentary about Ireland's stunning landscapes and ancient traditions",
|
29 |
+
'strings_quartet': "Elegant string quartet arrangement with rich harmonies and expressive dynamics, perfect for wedding ceremony music",
|
30 |
+
|
31 |
+
# Synthesizer Variations
|
32 |
+
'synth_retro': "1980s style synthesizer melody with warm analog pads and arpeggios, perfect for a nostalgic sci-fi movie soundtrack",
|
33 |
+
'synth_modern': "Modern electronic production with crisp digital synthesizer arpeggios and vocoder effects, ideal for a tech product launch video",
|
34 |
+
'synth_ambient': "Atmospheric synthesizer pads with reverb and delay, perfect for a meditation app or wellness commercial",
|
35 |
+
'synth_edm': "High-energy EDM synth saw leads with sidechain compression, pitch bends, perfect for sports highlights or action sequences",
|
36 |
+
|
37 |
+
# Band Arrangements
|
38 |
+
'rock_band': "Full rock band arrangement with electric guitars, bass, and drums, perfect for an action movie trailer",
|
39 |
+
|
40 |
+
# Hybrid/Special
|
41 |
+
'cinematic_epic': "Epic orchestral arrangement with modern hybrid elements, synthesizers, and percussion, perfect for movie trailers",
|
42 |
+
'lofi_chill': "Lo-fi hip hop style with vinyl crackle, mellow piano, and tape saturation, perfect for study or focus playlists",
|
43 |
+
'synth_bass': "Deep analog synthesizer bassline with modern production and subtle modulation, perfect for electronic music production",
|
44 |
+
'retro_rpg': "16-bit era JRPG soundtrack with bright melodic synthesizers, orchestral elements, and adventurous themes, perfect for a fantasy video game battle scene or overworld exploration",
|
45 |
+
'steel_drums': "Vibrant Caribbean steel drum ensemble with tropical percussion and uplifting melodies, perfect for a beach resort commercial or travel documentary",
|
46 |
+
'chiptune': "8-bit video game soundtrack with arpeggiated melodies and classic NES-style square waves, perfect for a retro platformer or action game",
|
47 |
+
'gamelan_fusion': "Indonesian gamelan ensemble with metallic percussion, gongs, and ethereal textures, perfect for a meditation app or spiritual documentary",
|
48 |
+
'music_box': "Delicate music box melody with gentle bell tones and ethereal ambiance, perfect for a children's lullaby or magical fantasy scene",
|
49 |
+
|
50 |
+
# Hip Hop / Trap Percussion
|
51 |
+
'trap_808': "808 bass",
|
52 |
+
'lo_fi_drums': "lofi hiphop percussion",
|
53 |
+
'boom_bap': "Classic 90s boom bap hip hop drums with punchy kicks, crisp snares, and jazz sample chops, perfect for documentary footage of urban street scenes and skateboarding",
|
54 |
+
'percussion_ensemble': "Rich percussive ensemble with djembe, congas, shakers, and tribal drums creating complex polyrhythms, perfect for nature documentaries about rainforests or ancient cultural rituals",
|
55 |
+
|
56 |
+
# Enhanced Electronic Music
|
57 |
+
'future_bass': "Energetic future bass with filtered supersaws, pitch-bending lead synths, heavy sidechain, and chopped vocal samples, perfect for extreme sports highlights or uplifting motivational content",
|
58 |
+
'synthwave_retro': "80s retrofuturistic synthwave with gated reverb drums, analog arpeggios, neon-bright lead synths and driving bass, perfect for cyberpunk-themed technology showcases or retro gaming montages",
|
59 |
+
'melodic_techno': "Hypnotic melodic techno with pulsing bass, atmospheric pads, and evolving synthesizer sequences with subtle filter modulation, ideal for timelapse footage of urban nightscapes or architectural showcases",
|
60 |
+
'dubstep_wobble': "Heavy dubstep with aggressive wobble bass, metallic synthesizers, distorted drops, and tension-building risers, perfect for action sequence transitions or gaming highlight reels",
|
61 |
+
|
62 |
+
# Glitchy Effects
|
63 |
+
'glitch_hop': "Glitch hop with stuttering sample slices, bit-crushed percussion, granular synthesis textures and digital artifacts, perfect for technology malfunction scenes or data visualization animations",
|
64 |
+
'digital_disruption': "Heavily glitched soundscape with digital artifacts, buffer errors, granular time stretching, and corrupted audio samples, ideal for cybersecurity themes or digital distortion transitions in tech presentations",
|
65 |
+
'circuit_bent': "Circuit-bent toy sounds with unpredictable pitch shifts, broken electronic tones, and hardware malfunction artifacts, perfect for creative coding demonstrations or innovative technology exhibitions",
|
66 |
+
|
67 |
+
# Experimental Hybrids
|
68 |
+
'orchestral_glitch': "Cinematic orchestral elements disrupted by digital glitches, granular textures, and temporal distortions, perfect for science fiction trailers or futuristic product reveals with contrasting classical and modern elements",
|
69 |
+
'vapor_drums': "Vaporwave drum processing with extreme pitch and time manipulation, reverb-drenched samples, and retro commercial music elements, ideal for nostalgic internet culture documentaries or retrofuturistic art installations",
|
70 |
+
'industrial_textures': "Harsh industrial soundscape with mechanical percussion, factory recordings, metallic impacts, and distorted synth drones, perfect for manufacturing process videos or dystopian urban environments",
|
71 |
+
'jungle_breaks': "High-energy jungle drum breaks with choppy breakbeat samples, deep sub bass, and dub reggae influences, perfect for fast-paced urban chase scenes or extreme sports montages"
|
72 |
+
}
|
73 |
+
|
74 |
# Global model storage
|
75 |
model_cache = {}
|
76 |
model_lock = threading.Lock()
|
|
|
142 |
model_cache['stable_audio_device'])
|
143 |
|
144 |
@spaces.GPU(duration=12)
|
145 |
+
def generate_stable_audio_loop(prompt, loop_type, bpm, bars, steps, cfg_scale, seed=-1):
|
146 |
"""Generate a BPM-aware loop using stable-audio-open-small"""
|
147 |
try:
|
148 |
total_start = time.time()
|
|
|
157 |
seconds_per_bar = seconds_per_beat * 4 # 4/4 time
|
158 |
target_loop_duration = seconds_per_bar * bars
|
159 |
|
|
|
160 |
# Enhance prompt based on loop type and BPM - minimal modification
|
161 |
if loop_type == "drums":
|
162 |
enhanced_prompt = f"{prompt} {bpm}bpm"
|
|
|
178 |
print(f"🎵 Generating {loop_type} loop:")
|
179 |
print(f" Enhanced prompt: {enhanced_prompt}")
|
180 |
print(f" Target duration: {target_loop_duration:.2f}s ({bars} bars at {bpm}bpm)")
|
181 |
+
print(f" Steps: {steps}, CFG Scale: {cfg_scale}")
|
182 |
print(f" Seed: {seed}")
|
183 |
|
184 |
# Prepare conditioning
|
|
|
197 |
# Generation timing
|
198 |
generation_start = time.time()
|
199 |
|
|
|
200 |
# Clear GPU cache once before generation (not after)
|
201 |
# if device == "cuda":
|
202 |
# torch.cuda.empty_cache()
|
|
|
204 |
with torch.cuda.amp.autocast(enabled=(device == "cuda")):
|
205 |
output = generate_diffusion_cond(
|
206 |
model,
|
207 |
+
steps=steps, # User-configurable steps
|
208 |
+
cfg_scale=cfg_scale, # User-configurable CFG scale
|
209 |
conditioning=conditioning,
|
210 |
negative_conditioning=negative_conditioning,
|
211 |
sample_size=config["sample_size"],
|
|
|
254 |
print(f" Total: {total_time:.2f}s")
|
255 |
print(f"✅ {loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
|
256 |
|
257 |
+
return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s (steps: {steps}, cfg: {cfg_scale})"
|
258 |
|
259 |
except Exception as e:
|
260 |
print(f"❌ Generation error: {str(e)}")
|
|
|
391 |
return bars
|
392 |
return 1
|
393 |
|
394 |
+
def update_transform_prompt(variation_choice):
|
395 |
+
"""Update the transformation prompt based on variation selection"""
|
396 |
+
if variation_choice == "custom":
|
397 |
+
return gr.update(value="", placeholder="enter your custom transformation prompt", interactive=True)
|
398 |
+
elif variation_choice in MELODYFLOW_VARIATIONS:
|
399 |
+
return gr.update(value=MELODYFLOW_VARIATIONS[variation_choice], interactive=True)
|
400 |
+
else:
|
401 |
+
return gr.update(value="", placeholder="select a variation or enter custom prompt", interactive=True)
|
402 |
+
|
403 |
# ========== GRADIO INTERFACE ==========
|
404 |
|
405 |
with gr.Blocks(title="stable-melodyflow") as iface:
|
|
|
458 |
- bpm-aware generation ensures perfect sync between loops (most the time lol)
|
459 |
- negative prompting separates drums from instruments (most the time)
|
460 |
- smart bar calculation optimizes loop length for the BPM
|
461 |
+
- preset transformation styles for braindead ease of use
|
462 |
""")
|
463 |
|
464 |
# ========== GLOBAL CONTROLS ==========
|
|
|
486 |
info="prompt applied to either loop. make it more drum/instrument specific for best results"
|
487 |
)
|
488 |
|
489 |
+
with gr.Row():
|
490 |
+
generation_steps = gr.Slider(
|
491 |
+
label="generation steps",
|
492 |
+
minimum=4,
|
493 |
+
maximum=16,
|
494 |
+
step=1,
|
495 |
+
value=8,
|
496 |
+
info="more steps = higher quality but slower generation"
|
497 |
+
)
|
498 |
+
|
499 |
+
cfg_scale = gr.Slider(
|
500 |
+
label="cfg scale",
|
501 |
+
minimum=0.5,
|
502 |
+
maximum=2.0,
|
503 |
+
step=0.1,
|
504 |
+
value=1.0,
|
505 |
+
info="higher values = more prompt adherence but potentially less natural"
|
506 |
+
)
|
507 |
+
|
508 |
# Auto-suggest optimal bars based on BPM
|
509 |
def update_suggested_bars(bpm):
|
510 |
optimal = calculate_optimal_bars(bpm)
|
|
|
555 |
|
556 |
with gr.Row():
|
557 |
with gr.Column():
|
558 |
+
# Variation dropdown
|
559 |
+
variation_choice = gr.Dropdown(
|
560 |
+
label="transformation style preset",
|
561 |
+
choices=["custom"] + list(MELODYFLOW_VARIATIONS.keys()),
|
562 |
+
value="custom",
|
563 |
+
info="select a preset style or choose 'custom' for your own prompt"
|
564 |
+
)
|
565 |
+
|
566 |
transform_prompt = gr.Textbox(
|
567 |
label="transformation prompt",
|
568 |
+
value="",
|
569 |
+
placeholder="enter your custom transformation prompt",
|
570 |
+
lines=3,
|
571 |
+
info="describes the style transformation to apply"
|
572 |
)
|
573 |
|
574 |
with gr.Column():
|
|
|
593 |
|
594 |
# ========== EVENT HANDLERS ==========
|
595 |
|
596 |
+
# Update transform prompt when variation is selected
|
597 |
+
variation_choice.change(
|
598 |
+
update_transform_prompt,
|
599 |
+
inputs=[variation_choice],
|
600 |
+
outputs=[transform_prompt]
|
601 |
+
)
|
602 |
+
|
603 |
# Generate drums
|
604 |
generate_drums_btn.click(
|
605 |
generate_stable_audio_loop,
|
606 |
+
inputs=[base_prompt, gr.State("drums"), global_bpm, global_bars, generation_steps, cfg_scale, drums_seed],
|
607 |
outputs=[drums_audio, drums_status]
|
608 |
)
|
609 |
|
610 |
# Generate instruments
|
611 |
generate_instruments_btn.click(
|
612 |
generate_stable_audio_loop,
|
613 |
+
inputs=[base_prompt, gr.State("instruments"), global_bpm, global_bars, generation_steps, cfg_scale, instruments_seed],
|
614 |
outputs=[instruments_audio, instruments_status]
|
615 |
)
|
616 |
|
|
|
627 |
inputs=[combined_audio, transform_prompt, transform_solver, transform_flowstep],
|
628 |
outputs=[transformed_audio, transform_status]
|
629 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
630 |
|
631 |
if __name__ == "__main__":
|
632 |
iface.launch()
|