thecollabagepatch commited on
Commit
11f5aeb
·
1 Parent(s): cfd186d

cfg and steps params added

Browse files
Files changed (1) hide show
  1. app.py +107 -25
app.py CHANGED
@@ -19,6 +19,58 @@ from stable_audio_tools.inference.generation import generate_diffusion_cond
19
  from gradio_client import Client, handle_file
20
  from contextlib import contextmanager
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Global model storage
23
  model_cache = {}
24
  model_lock = threading.Lock()
@@ -90,7 +142,7 @@ def load_stable_audio_model():
90
  model_cache['stable_audio_device'])
91
 
92
  @spaces.GPU(duration=12)
93
- def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
94
  """Generate a BPM-aware loop using stable-audio-open-small"""
95
  try:
96
  total_start = time.time()
@@ -105,7 +157,6 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
105
  seconds_per_bar = seconds_per_beat * 4 # 4/4 time
106
  target_loop_duration = seconds_per_bar * bars
107
 
108
-
109
  # Enhance prompt based on loop type and BPM - minimal modification
110
  if loop_type == "drums":
111
  enhanced_prompt = f"{prompt} {bpm}bpm"
@@ -127,6 +178,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
127
  print(f"🎵 Generating {loop_type} loop:")
128
  print(f" Enhanced prompt: {enhanced_prompt}")
129
  print(f" Target duration: {target_loop_duration:.2f}s ({bars} bars at {bpm}bpm)")
 
130
  print(f" Seed: {seed}")
131
 
132
  # Prepare conditioning
@@ -145,7 +197,6 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
145
  # Generation timing
146
  generation_start = time.time()
147
 
148
- # Removed aggressive resource cleanup wrapper
149
  # Clear GPU cache once before generation (not after)
150
  # if device == "cuda":
151
  # torch.cuda.empty_cache()
@@ -153,8 +204,8 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
153
  with torch.cuda.amp.autocast(enabled=(device == "cuda")):
154
  output = generate_diffusion_cond(
155
  model,
156
- steps=8, # Fast generation
157
- cfg_scale=1.0, # Good balance for loops
158
  conditioning=conditioning,
159
  negative_conditioning=negative_conditioning,
160
  sample_size=config["sample_size"],
@@ -203,7 +254,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
203
  print(f" Total: {total_time:.2f}s")
204
  print(f"✅ {loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
205
 
206
- return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s"
207
 
208
  except Exception as e:
209
  print(f"❌ Generation error: {str(e)}")
@@ -340,6 +391,15 @@ def calculate_optimal_bars(bpm):
340
  return bars
341
  return 1
342
 
 
 
 
 
 
 
 
 
 
343
  # ========== GRADIO INTERFACE ==========
344
 
345
  with gr.Blocks(title="stable-melodyflow") as iface:
@@ -398,6 +458,7 @@ with gr.Blocks(title="stable-melodyflow") as iface:
398
  - bpm-aware generation ensures perfect sync between loops (most the time lol)
399
  - negative prompting separates drums from instruments (most the time)
400
  - smart bar calculation optimizes loop length for the BPM
 
401
  """)
402
 
403
  # ========== GLOBAL CONTROLS ==========
@@ -425,6 +486,25 @@ with gr.Blocks(title="stable-melodyflow") as iface:
425
  info="prompt applied to either loop. make it more drum/instrument specific for best results"
426
  )
427
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  # Auto-suggest optimal bars based on BPM
429
  def update_suggested_bars(bpm):
430
  optimal = calculate_optimal_bars(bpm)
@@ -475,11 +555,20 @@ with gr.Blocks(title="stable-melodyflow") as iface:
475
 
476
  with gr.Row():
477
  with gr.Column():
 
 
 
 
 
 
 
 
478
  transform_prompt = gr.Textbox(
479
  label="transformation prompt",
480
- value="aggressive industrial techno with distorted sounds",
481
- placeholder="describe the style of transformation",
482
- lines=2
 
483
  )
484
 
485
  with gr.Column():
@@ -504,17 +593,24 @@ with gr.Blocks(title="stable-melodyflow") as iface:
504
 
505
  # ========== EVENT HANDLERS ==========
506
 
 
 
 
 
 
 
 
507
  # Generate drums
508
  generate_drums_btn.click(
509
  generate_stable_audio_loop,
510
- inputs=[base_prompt, gr.State("drums"), global_bpm, global_bars, drums_seed],
511
  outputs=[drums_audio, drums_status]
512
  )
513
 
514
  # Generate instruments
515
  generate_instruments_btn.click(
516
  generate_stable_audio_loop,
517
- inputs=[base_prompt, gr.State("instruments"), global_bpm, global_bars, instruments_seed],
518
  outputs=[instruments_audio, instruments_status]
519
  )
520
 
@@ -531,20 +627,6 @@ with gr.Blocks(title="stable-melodyflow") as iface:
531
  inputs=[combined_audio, transform_prompt, transform_solver, transform_flowstep],
532
  outputs=[transformed_audio, transform_status]
533
  )
534
-
535
- # # ========== EXAMPLES ==========
536
- # gr.Markdown("## 🎯 Example Workflows")
537
-
538
- # examples = gr.Examples(
539
- # examples=[
540
- # ["techno", 128, 4, "aggressive industrial techno"],
541
- # ["jazz", 110, 2, "smooth lo-fi jazz with vinyl crackle"],
542
- # ["ambient", 90, 8, "ethereal ambient soundscape"],
543
- # ["hip-hop", 100, 4, "classic boom bap hip-hop"],
544
- # ["drum and bass", 140, 4, "liquid drum and bass"],
545
- # ],
546
- # inputs=[base_prompt, global_bpm, global_bars, transform_prompt],
547
- # )
548
 
549
  if __name__ == "__main__":
550
  iface.launch()
 
19
  from gradio_client import Client, handle_file
20
  from contextlib import contextmanager
21
 
22
+ # MelodyFlow Variations - extracted from variations.py
23
+ MELODYFLOW_VARIATIONS = {
24
+ # Acoustic Instruments
25
+ 'accordion_folk': "Lively accordion music with a European folk feeling, perfect for a travel documentary about traditional culture and street performances in Paris",
26
+ 'banjo_bluegrass': "Authentic bluegrass banjo band performance with rich picking patterns, ideal for a heartfelt documentary about American rural life and traditional crafts",
27
+ 'piano_classical': "Expressive classical piano performance with dynamic range and emotional depth, ideal for a luxury brand commercial",
28
+ 'celtic': "Traditional Celtic arrangement with fiddle and flute, perfect for a documentary about Ireland's stunning landscapes and ancient traditions",
29
+ 'strings_quartet': "Elegant string quartet arrangement with rich harmonies and expressive dynamics, perfect for wedding ceremony music",
30
+
31
+ # Synthesizer Variations
32
+ 'synth_retro': "1980s style synthesizer melody with warm analog pads and arpeggios, perfect for a nostalgic sci-fi movie soundtrack",
33
+ 'synth_modern': "Modern electronic production with crisp digital synthesizer arpeggios and vocoder effects, ideal for a tech product launch video",
34
+ 'synth_ambient': "Atmospheric synthesizer pads with reverb and delay, perfect for a meditation app or wellness commercial",
35
+ 'synth_edm': "High-energy EDM synth saw leads with sidechain compression, pitch bends, perfect for sports highlights or action sequences",
36
+
37
+ # Band Arrangements
38
+ 'rock_band': "Full rock band arrangement with electric guitars, bass, and drums, perfect for an action movie trailer",
39
+
40
+ # Hybrid/Special
41
+ 'cinematic_epic': "Epic orchestral arrangement with modern hybrid elements, synthesizers, and percussion, perfect for movie trailers",
42
+ 'lofi_chill': "Lo-fi hip hop style with vinyl crackle, mellow piano, and tape saturation, perfect for study or focus playlists",
43
+ 'synth_bass': "Deep analog synthesizer bassline with modern production and subtle modulation, perfect for electronic music production",
44
+ 'retro_rpg': "16-bit era JRPG soundtrack with bright melodic synthesizers, orchestral elements, and adventurous themes, perfect for a fantasy video game battle scene or overworld exploration",
45
+ 'steel_drums': "Vibrant Caribbean steel drum ensemble with tropical percussion and uplifting melodies, perfect for a beach resort commercial or travel documentary",
46
+ 'chiptune': "8-bit video game soundtrack with arpeggiated melodies and classic NES-style square waves, perfect for a retro platformer or action game",
47
+ 'gamelan_fusion': "Indonesian gamelan ensemble with metallic percussion, gongs, and ethereal textures, perfect for a meditation app or spiritual documentary",
48
+ 'music_box': "Delicate music box melody with gentle bell tones and ethereal ambiance, perfect for a children's lullaby or magical fantasy scene",
49
+
50
+ # Hip Hop / Trap Percussion
51
+ 'trap_808': "808 bass",
52
+ 'lo_fi_drums': "lofi hiphop percussion",
53
+ 'boom_bap': "Classic 90s boom bap hip hop drums with punchy kicks, crisp snares, and jazz sample chops, perfect for documentary footage of urban street scenes and skateboarding",
54
+ 'percussion_ensemble': "Rich percussive ensemble with djembe, congas, shakers, and tribal drums creating complex polyrhythms, perfect for nature documentaries about rainforests or ancient cultural rituals",
55
+
56
+ # Enhanced Electronic Music
57
+ 'future_bass': "Energetic future bass with filtered supersaws, pitch-bending lead synths, heavy sidechain, and chopped vocal samples, perfect for extreme sports highlights or uplifting motivational content",
58
+ 'synthwave_retro': "80s retrofuturistic synthwave with gated reverb drums, analog arpeggios, neon-bright lead synths and driving bass, perfect for cyberpunk-themed technology showcases or retro gaming montages",
59
+ 'melodic_techno': "Hypnotic melodic techno with pulsing bass, atmospheric pads, and evolving synthesizer sequences with subtle filter modulation, ideal for timelapse footage of urban nightscapes or architectural showcases",
60
+ 'dubstep_wobble': "Heavy dubstep with aggressive wobble bass, metallic synthesizers, distorted drops, and tension-building risers, perfect for action sequence transitions or gaming highlight reels",
61
+
62
+ # Glitchy Effects
63
+ 'glitch_hop': "Glitch hop with stuttering sample slices, bit-crushed percussion, granular synthesis textures and digital artifacts, perfect for technology malfunction scenes or data visualization animations",
64
+ 'digital_disruption': "Heavily glitched soundscape with digital artifacts, buffer errors, granular time stretching, and corrupted audio samples, ideal for cybersecurity themes or digital distortion transitions in tech presentations",
65
+ 'circuit_bent': "Circuit-bent toy sounds with unpredictable pitch shifts, broken electronic tones, and hardware malfunction artifacts, perfect for creative coding demonstrations or innovative technology exhibitions",
66
+
67
+ # Experimental Hybrids
68
+ 'orchestral_glitch': "Cinematic orchestral elements disrupted by digital glitches, granular textures, and temporal distortions, perfect for science fiction trailers or futuristic product reveals with contrasting classical and modern elements",
69
+ 'vapor_drums': "Vaporwave drum processing with extreme pitch and time manipulation, reverb-drenched samples, and retro commercial music elements, ideal for nostalgic internet culture documentaries or retrofuturistic art installations",
70
+ 'industrial_textures': "Harsh industrial soundscape with mechanical percussion, factory recordings, metallic impacts, and distorted synth drones, perfect for manufacturing process videos or dystopian urban environments",
71
+ 'jungle_breaks': "High-energy jungle drum breaks with choppy breakbeat samples, deep sub bass, and dub reggae influences, perfect for fast-paced urban chase scenes or extreme sports montages"
72
+ }
73
+
74
  # Global model storage
75
  model_cache = {}
76
  model_lock = threading.Lock()
 
142
  model_cache['stable_audio_device'])
143
 
144
  @spaces.GPU(duration=12)
145
+ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, steps, cfg_scale, seed=-1):
146
  """Generate a BPM-aware loop using stable-audio-open-small"""
147
  try:
148
  total_start = time.time()
 
157
  seconds_per_bar = seconds_per_beat * 4 # 4/4 time
158
  target_loop_duration = seconds_per_bar * bars
159
 
 
160
  # Enhance prompt based on loop type and BPM - minimal modification
161
  if loop_type == "drums":
162
  enhanced_prompt = f"{prompt} {bpm}bpm"
 
178
  print(f"🎵 Generating {loop_type} loop:")
179
  print(f" Enhanced prompt: {enhanced_prompt}")
180
  print(f" Target duration: {target_loop_duration:.2f}s ({bars} bars at {bpm}bpm)")
181
+ print(f" Steps: {steps}, CFG Scale: {cfg_scale}")
182
  print(f" Seed: {seed}")
183
 
184
  # Prepare conditioning
 
197
  # Generation timing
198
  generation_start = time.time()
199
 
 
200
  # Clear GPU cache once before generation (not after)
201
  # if device == "cuda":
202
  # torch.cuda.empty_cache()
 
204
  with torch.cuda.amp.autocast(enabled=(device == "cuda")):
205
  output = generate_diffusion_cond(
206
  model,
207
+ steps=steps, # User-configurable steps
208
+ cfg_scale=cfg_scale, # User-configurable CFG scale
209
  conditioning=conditioning,
210
  negative_conditioning=negative_conditioning,
211
  sample_size=config["sample_size"],
 
254
  print(f" Total: {total_time:.2f}s")
255
  print(f"✅ {loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
256
 
257
+ return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s (steps: {steps}, cfg: {cfg_scale})"
258
 
259
  except Exception as e:
260
  print(f"❌ Generation error: {str(e)}")
 
391
  return bars
392
  return 1
393
 
394
+ def update_transform_prompt(variation_choice):
395
+ """Update the transformation prompt based on variation selection"""
396
+ if variation_choice == "custom":
397
+ return gr.update(value="", placeholder="enter your custom transformation prompt", interactive=True)
398
+ elif variation_choice in MELODYFLOW_VARIATIONS:
399
+ return gr.update(value=MELODYFLOW_VARIATIONS[variation_choice], interactive=True)
400
+ else:
401
+ return gr.update(value="", placeholder="select a variation or enter custom prompt", interactive=True)
402
+
403
  # ========== GRADIO INTERFACE ==========
404
 
405
  with gr.Blocks(title="stable-melodyflow") as iface:
 
458
  - bpm-aware generation ensures perfect sync between loops (most the time lol)
459
  - negative prompting separates drums from instruments (most the time)
460
  - smart bar calculation optimizes loop length for the BPM
461
+ - preset transformation styles for braindead ease of use
462
  """)
463
 
464
  # ========== GLOBAL CONTROLS ==========
 
486
  info="prompt applied to either loop. make it more drum/instrument specific for best results"
487
  )
488
 
489
+ with gr.Row():
490
+ generation_steps = gr.Slider(
491
+ label="generation steps",
492
+ minimum=4,
493
+ maximum=16,
494
+ step=1,
495
+ value=8,
496
+ info="more steps = higher quality but slower generation"
497
+ )
498
+
499
+ cfg_scale = gr.Slider(
500
+ label="cfg scale",
501
+ minimum=0.5,
502
+ maximum=2.0,
503
+ step=0.1,
504
+ value=1.0,
505
+ info="higher values = more prompt adherence but potentially less natural"
506
+ )
507
+
508
  # Auto-suggest optimal bars based on BPM
509
  def update_suggested_bars(bpm):
510
  optimal = calculate_optimal_bars(bpm)
 
555
 
556
  with gr.Row():
557
  with gr.Column():
558
+ # Variation dropdown
559
+ variation_choice = gr.Dropdown(
560
+ label="transformation style preset",
561
+ choices=["custom"] + list(MELODYFLOW_VARIATIONS.keys()),
562
+ value="custom",
563
+ info="select a preset style or choose 'custom' for your own prompt"
564
+ )
565
+
566
  transform_prompt = gr.Textbox(
567
  label="transformation prompt",
568
+ value="",
569
+ placeholder="enter your custom transformation prompt",
570
+ lines=3,
571
+ info="describes the style transformation to apply"
572
  )
573
 
574
  with gr.Column():
 
593
 
594
  # ========== EVENT HANDLERS ==========
595
 
596
+ # Update transform prompt when variation is selected
597
+ variation_choice.change(
598
+ update_transform_prompt,
599
+ inputs=[variation_choice],
600
+ outputs=[transform_prompt]
601
+ )
602
+
603
  # Generate drums
604
  generate_drums_btn.click(
605
  generate_stable_audio_loop,
606
+ inputs=[base_prompt, gr.State("drums"), global_bpm, global_bars, generation_steps, cfg_scale, drums_seed],
607
  outputs=[drums_audio, drums_status]
608
  )
609
 
610
  # Generate instruments
611
  generate_instruments_btn.click(
612
  generate_stable_audio_loop,
613
+ inputs=[base_prompt, gr.State("instruments"), global_bpm, global_bars, generation_steps, cfg_scale, instruments_seed],
614
  outputs=[instruments_audio, instruments_status]
615
  )
616
 
 
627
  inputs=[combined_audio, transform_prompt, transform_solver, transform_flowstep],
628
  outputs=[transformed_audio, transform_status]
629
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
630
 
631
  if __name__ == "__main__":
632
  iface.launch()