Spaces:

thepatch
/

stable-melodyflow

Running on Zero

App Files Files Community

thecollabagepatch commited on 1 day ago

Commit

11f5aeb

1 Parent(s): cfd186d

cfg and steps params added

Browse files

Files changed (1) hide show

app.py +107 -25

app.py CHANGED Viewed

@@ -19,6 +19,58 @@ from stable_audio_tools.inference.generation import generate_diffusion_cond
 from gradio_client import Client, handle_file
 from contextlib import contextmanager
 # Global model storage
 model_cache = {}
 model_lock = threading.Lock()
@@ -90,7 +142,7 @@ def load_stable_audio_model():
                 model_cache['stable_audio_device'])
 @spaces.GPU(duration=12)
-def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
     """Generate a BPM-aware loop using stable-audio-open-small"""
     try:
         total_start = time.time()
@@ -105,7 +157,6 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
         seconds_per_bar = seconds_per_beat * 4  # 4/4 time
         target_loop_duration = seconds_per_bar * bars
         # Enhance prompt based on loop type and BPM - minimal modification
         if loop_type == "drums":
             enhanced_prompt = f"{prompt} {bpm}bpm"
@@ -127,6 +178,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
         print(f"🎵 Generating {loop_type} loop:")
         print(f"   Enhanced prompt: {enhanced_prompt}")
         print(f"   Target duration: {target_loop_duration:.2f}s ({bars} bars at {bpm}bpm)")
         print(f"   Seed: {seed}")
         # Prepare conditioning
@@ -145,7 +197,6 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
         # Generation timing
         generation_start = time.time()
-        # Removed aggressive resource cleanup wrapper
         # Clear GPU cache once before generation (not after)
         # if device == "cuda":
         #     torch.cuda.empty_cache()
@@ -153,8 +204,8 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
         with torch.cuda.amp.autocast(enabled=(device == "cuda")):
             output = generate_diffusion_cond(
                 model,
-                steps=8,  # Fast generation
-                cfg_scale=1.0,  # Good balance for loops
                 conditioning=conditioning,
                 negative_conditioning=negative_conditioning,
                 sample_size=config["sample_size"],
@@ -203,7 +254,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
         print(f"   Total: {total_time:.2f}s")
         print(f"✅ {loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
-        return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s"
     except Exception as e:
         print(f"❌ Generation error: {str(e)}")
@@ -340,6 +391,15 @@ def calculate_optimal_bars(bpm):
             return bars
     return 1
 # ========== GRADIO INTERFACE ==========
 with gr.Blocks(title="stable-melodyflow") as iface:
@@ -398,6 +458,7 @@ with gr.Blocks(title="stable-melodyflow") as iface:
         - bpm-aware generation ensures perfect sync between loops (most the time lol)
         - negative prompting separates drums from instruments (most the time)
         - smart bar calculation optimizes loop length for the BPM
         """)
     # ========== GLOBAL CONTROLS ==========
@@ -425,6 +486,25 @@ with gr.Blocks(title="stable-melodyflow") as iface:
             info="prompt applied to either loop. make it more drum/instrument specific for best results"
         )
     # Auto-suggest optimal bars based on BPM
     def update_suggested_bars(bpm):
         optimal = calculate_optimal_bars(bpm)
@@ -475,11 +555,20 @@ with gr.Blocks(title="stable-melodyflow") as iface:
     with gr.Row():
         with gr.Column():
             transform_prompt = gr.Textbox(
                 label="transformation prompt",
-                value="aggressive industrial techno with distorted sounds",
-                placeholder="describe the style of transformation",
-                lines=2
             )
         with gr.Column():
@@ -504,17 +593,24 @@ with gr.Blocks(title="stable-melodyflow") as iface:
     # ========== EVENT HANDLERS ==========
     # Generate drums
     generate_drums_btn.click(
         generate_stable_audio_loop,
-        inputs=[base_prompt, gr.State("drums"), global_bpm, global_bars, drums_seed],
         outputs=[drums_audio, drums_status]
     )
     # Generate instruments
     generate_instruments_btn.click(
         generate_stable_audio_loop,
-        inputs=[base_prompt, gr.State("instruments"), global_bpm, global_bars, instruments_seed],
         outputs=[instruments_audio, instruments_status]
     )
@@ -531,20 +627,6 @@ with gr.Blocks(title="stable-melodyflow") as iface:
         inputs=[combined_audio, transform_prompt, transform_solver, transform_flowstep],
         outputs=[transformed_audio, transform_status]
     )
-    # # ========== EXAMPLES ==========
-    # gr.Markdown("## 🎯 Example Workflows")
-    # examples = gr.Examples(
-    #     examples=[
-    #         ["techno", 128, 4, "aggressive industrial techno"],
-    #         ["jazz", 110, 2, "smooth lo-fi jazz with vinyl crackle"],
-    #         ["ambient", 90, 8, "ethereal ambient soundscape"],
-    #         ["hip-hop", 100, 4, "classic boom bap hip-hop"],
-    #         ["drum and bass", 140, 4, "liquid drum and bass"],
-    #     ],
-    #     inputs=[base_prompt, global_bpm, global_bars, transform_prompt],
-    # )
 if __name__ == "__main__":
     iface.launch()

 from gradio_client import Client, handle_file
 from contextlib import contextmanager
+# MelodyFlow Variations - extracted from variations.py
+MELODYFLOW_VARIATIONS = {
+    # Acoustic Instruments
+    'accordion_folk': "Lively accordion music with a European folk feeling, perfect for a travel documentary about traditional culture and street performances in Paris",
+    'banjo_bluegrass': "Authentic bluegrass banjo band performance with rich picking patterns, ideal for a heartfelt documentary about American rural life and traditional crafts",
+    'piano_classical': "Expressive classical piano performance with dynamic range and emotional depth, ideal for a luxury brand commercial",
+    'celtic': "Traditional Celtic arrangement with fiddle and flute, perfect for a documentary about Ireland's stunning landscapes and ancient traditions",
+    'strings_quartet': "Elegant string quartet arrangement with rich harmonies and expressive dynamics, perfect for wedding ceremony music",
+    # Synthesizer Variations
+    'synth_retro': "1980s style synthesizer melody with warm analog pads and arpeggios, perfect for a nostalgic sci-fi movie soundtrack",
+    'synth_modern': "Modern electronic production with crisp digital synthesizer arpeggios and vocoder effects, ideal for a tech product launch video",
+    'synth_ambient': "Atmospheric synthesizer pads with reverb and delay, perfect for a meditation app or wellness commercial",
+    'synth_edm': "High-energy EDM synth saw leads with sidechain compression, pitch bends, perfect for sports highlights or action sequences",
+    # Band Arrangements
+    'rock_band': "Full rock band arrangement with electric guitars, bass, and drums, perfect for an action movie trailer",
+    # Hybrid/Special
+    'cinematic_epic': "Epic orchestral arrangement with modern hybrid elements, synthesizers, and percussion, perfect for movie trailers",
+    'lofi_chill': "Lo-fi hip hop style with vinyl crackle, mellow piano, and tape saturation, perfect for study or focus playlists",
+    'synth_bass': "Deep analog synthesizer bassline with modern production and subtle modulation, perfect for electronic music production",
+    'retro_rpg': "16-bit era JRPG soundtrack with bright melodic synthesizers, orchestral elements, and adventurous themes, perfect for a fantasy video game battle scene or overworld exploration",
+    'steel_drums': "Vibrant Caribbean steel drum ensemble with tropical percussion and uplifting melodies, perfect for a beach resort commercial or travel documentary",
+    'chiptune': "8-bit video game soundtrack with arpeggiated melodies and classic NES-style square waves, perfect for a retro platformer or action game",
+    'gamelan_fusion': "Indonesian gamelan ensemble with metallic percussion, gongs, and ethereal textures, perfect for a meditation app or spiritual documentary",
+    'music_box': "Delicate music box melody with gentle bell tones and ethereal ambiance, perfect for a children's lullaby or magical fantasy scene",
+    # Hip Hop / Trap Percussion
+    'trap_808': "808 bass",
+    'lo_fi_drums': "lofi hiphop percussion",
+    'boom_bap': "Classic 90s boom bap hip hop drums with punchy kicks, crisp snares, and jazz sample chops, perfect for documentary footage of urban street scenes and skateboarding",
+    'percussion_ensemble': "Rich percussive ensemble with djembe, congas, shakers, and tribal drums creating complex polyrhythms, perfect for nature documentaries about rainforests or ancient cultural rituals",
+    # Enhanced Electronic Music
+    'future_bass': "Energetic future bass with filtered supersaws, pitch-bending lead synths, heavy sidechain, and chopped vocal samples, perfect for extreme sports highlights or uplifting motivational content",
+    'synthwave_retro': "80s retrofuturistic synthwave with gated reverb drums, analog arpeggios, neon-bright lead synths and driving bass, perfect for cyberpunk-themed technology showcases or retro gaming montages",
+    'melodic_techno': "Hypnotic melodic techno with pulsing bass, atmospheric pads, and evolving synthesizer sequences with subtle filter modulation, ideal for timelapse footage of urban nightscapes or architectural showcases",
+    'dubstep_wobble': "Heavy dubstep with aggressive wobble bass, metallic synthesizers, distorted drops, and tension-building risers, perfect for action sequence transitions or gaming highlight reels",
+    # Glitchy Effects
+    'glitch_hop': "Glitch hop with stuttering sample slices, bit-crushed percussion, granular synthesis textures and digital artifacts, perfect for technology malfunction scenes or data visualization animations",
+    'digital_disruption': "Heavily glitched soundscape with digital artifacts, buffer errors, granular time stretching, and corrupted audio samples, ideal for cybersecurity themes or digital distortion transitions in tech presentations",
+    'circuit_bent': "Circuit-bent toy sounds with unpredictable pitch shifts, broken electronic tones, and hardware malfunction artifacts, perfect for creative coding demonstrations or innovative technology exhibitions",
+    # Experimental Hybrids
+    'orchestral_glitch': "Cinematic orchestral elements disrupted by digital glitches, granular textures, and temporal distortions, perfect for science fiction trailers or futuristic product reveals with contrasting classical and modern elements",
+    'vapor_drums': "Vaporwave drum processing with extreme pitch and time manipulation, reverb-drenched samples, and retro commercial music elements, ideal for nostalgic internet culture documentaries or retrofuturistic art installations",
+    'industrial_textures': "Harsh industrial soundscape with mechanical percussion, factory recordings, metallic impacts, and distorted synth drones, perfect for manufacturing process videos or dystopian urban environments",
+    'jungle_breaks': "High-energy jungle drum breaks with choppy breakbeat samples, deep sub bass, and dub reggae influences, perfect for fast-paced urban chase scenes or extreme sports montages"
+}
 # Global model storage
 model_cache = {}
 model_lock = threading.Lock()
                 model_cache['stable_audio_device'])
 @spaces.GPU(duration=12)
+def generate_stable_audio_loop(prompt, loop_type, bpm, bars, steps, cfg_scale, seed=-1):
     """Generate a BPM-aware loop using stable-audio-open-small"""
     try:
         total_start = time.time()
         seconds_per_bar = seconds_per_beat * 4  # 4/4 time
         target_loop_duration = seconds_per_bar * bars
         # Enhance prompt based on loop type and BPM - minimal modification
         if loop_type == "drums":
             enhanced_prompt = f"{prompt} {bpm}bpm"
         print(f"🎵 Generating {loop_type} loop:")
         print(f"   Enhanced prompt: {enhanced_prompt}")
         print(f"   Target duration: {target_loop_duration:.2f}s ({bars} bars at {bpm}bpm)")
+        print(f"   Steps: {steps}, CFG Scale: {cfg_scale}")
         print(f"   Seed: {seed}")
         # Prepare conditioning
         # Generation timing
         generation_start = time.time()
         # Clear GPU cache once before generation (not after)
         # if device == "cuda":
         #     torch.cuda.empty_cache()
         with torch.cuda.amp.autocast(enabled=(device == "cuda")):
             output = generate_diffusion_cond(
                 model,
+                steps=steps,  # User-configurable steps
+                cfg_scale=cfg_scale,  # User-configurable CFG scale
                 conditioning=conditioning,
                 negative_conditioning=negative_conditioning,
                 sample_size=config["sample_size"],
         print(f"   Total: {total_time:.2f}s")
         print(f"✅ {loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
+        return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s (steps: {steps}, cfg: {cfg_scale})"
     except Exception as e:
         print(f"❌ Generation error: {str(e)}")
             return bars
     return 1
+def update_transform_prompt(variation_choice):
+    """Update the transformation prompt based on variation selection"""
+    if variation_choice == "custom":
+        return gr.update(value="", placeholder="enter your custom transformation prompt", interactive=True)
+    elif variation_choice in MELODYFLOW_VARIATIONS:
+        return gr.update(value=MELODYFLOW_VARIATIONS[variation_choice], interactive=True)
+    else:
+        return gr.update(value="", placeholder="select a variation or enter custom prompt", interactive=True)
 # ========== GRADIO INTERFACE ==========
 with gr.Blocks(title="stable-melodyflow") as iface:
         - bpm-aware generation ensures perfect sync between loops (most the time lol)
         - negative prompting separates drums from instruments (most the time)
         - smart bar calculation optimizes loop length for the BPM
+        - preset transformation styles for braindead ease of use
         """)
     # ========== GLOBAL CONTROLS ==========
             info="prompt applied to either loop. make it more drum/instrument specific for best results"
         )
+    with gr.Row():
+        generation_steps = gr.Slider(
+            label="generation steps",
+            minimum=4,
+            maximum=16,
+            step=1,
+            value=8,
+            info="more steps = higher quality but slower generation"
+        )
+        cfg_scale = gr.Slider(
+            label="cfg scale",
+            minimum=0.5,
+            maximum=2.0,
+            step=0.1,
+            value=1.0,
+            info="higher values = more prompt adherence but potentially less natural"
+        )
     # Auto-suggest optimal bars based on BPM
     def update_suggested_bars(bpm):
         optimal = calculate_optimal_bars(bpm)
     with gr.Row():
         with gr.Column():
+            # Variation dropdown
+            variation_choice = gr.Dropdown(
+                label="transformation style preset",
+                choices=["custom"] + list(MELODYFLOW_VARIATIONS.keys()),
+                value="custom",
+                info="select a preset style or choose 'custom' for your own prompt"
+            )
             transform_prompt = gr.Textbox(
                 label="transformation prompt",
+                value="",
+                placeholder="enter your custom transformation prompt",
+                lines=3,
+                info="describes the style transformation to apply"
             )
         with gr.Column():
     # ========== EVENT HANDLERS ==========
+    # Update transform prompt when variation is selected
+    variation_choice.change(
+        update_transform_prompt,
+        inputs=[variation_choice],
+        outputs=[transform_prompt]
+    )
     # Generate drums
     generate_drums_btn.click(
         generate_stable_audio_loop,
+        inputs=[base_prompt, gr.State("drums"), global_bpm, global_bars, generation_steps, cfg_scale, drums_seed],
         outputs=[drums_audio, drums_status]
     )
     # Generate instruments
     generate_instruments_btn.click(
         generate_stable_audio_loop,
+        inputs=[base_prompt, gr.State("instruments"), global_bpm, global_bars, generation_steps, cfg_scale, instruments_seed],
         outputs=[instruments_audio, instruments_status]
     )
         inputs=[combined_audio, transform_prompt, transform_solver, transform_flowstep],
         outputs=[transformed_audio, transform_status]
     )
 if __name__ == "__main__":
     iface.launch()