Spaces:

thepatch
/

stable-melodyflow

Running on Zero

App Files Files Community

thecollabagepatch commited on 1 day ago

Commit

3e6fdea

1 Parent(s): 108943b

trying to solve model loading

Browse files

Files changed (1) hide show

app.py +99 -50

app.py CHANGED Viewed

@@ -25,20 +25,21 @@ model_lock = threading.Lock()
 @contextmanager
 def resource_cleanup():
-    """Context manager to ensure proper cleanup of GPU resources."""
     try:
         yield
     finally:
         if torch.cuda.is_available():
             torch.cuda.synchronize()
-            torch.cuda.empty_cache()
-        gc.collect()
 def load_stable_audio_model():
     """Load stable-audio-open-small model if not already loaded."""
     with model_lock:
         if 'stable_audio_model' not in model_cache:
             print("🔄 Loading stable-audio-open-small model...")
             # Authenticate with HF
             hf_token = os.getenv('HF_TOKEN')
@@ -53,10 +54,36 @@ def load_stable_audio_model():
             if device == "cuda":
                 model = model.half()
             model_cache['stable_audio_model'] = model
             model_cache['stable_audio_config'] = config
             model_cache['stable_audio_device'] = device
-            print(f"✅ Stable Audio model loaded on {device}")
         return (model_cache['stable_audio_model'],
                 model_cache['stable_audio_config'],
@@ -66,7 +93,12 @@ def load_stable_audio_model():
 def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
     """Generate a BPM-aware loop using stable-audio-open-small"""
     try:
         model, config, device = load_stable_audio_model()
         # Calculate loop duration based on BPM and bars
         seconds_per_beat = 60.0 / bpm
@@ -95,6 +127,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
         print(f"   Seed: {seed}")
         # Prepare conditioning
         conditioning = [{
             "prompt": enhanced_prompt,
             "seconds_total": 12  # Model generates 12s max
@@ -104,54 +137,70 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
             "prompt": negative_prompt,
             "seconds_total": 12
         }]
-        start_time = time.time()
-        with resource_cleanup():
-            if device == "cuda":
-                torch.cuda.empty_cache()
-            with torch.cuda.amp.autocast(enabled=(device == "cuda")):
-                output = generate_diffusion_cond(
-                    model,
-                    steps=8,  # Fast generation
-                    cfg_scale=1.0,  # Good balance for loops
-                    conditioning=conditioning,
-                    negative_conditioning=negative_conditioning,
-                    sample_size=config["sample_size"],
-                    sampler_type="pingpong",
-                    device=device,
-                    seed=seed
-                )
-            generation_time = time.time() - start_time
-            # Post-process audio
-            output = rearrange(output, "b d n -> d (b n)")  # (2, N) stereo
-            output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1)
-            # Extract the loop portion
-            sample_rate = config["sample_rate"]
-            loop_samples = int(target_loop_duration * sample_rate)
-            available_samples = output.shape[1]
-            if loop_samples > available_samples:
-                loop_samples = available_samples
-                actual_duration = available_samples / sample_rate
-                print(f"⚠️ Requested {target_loop_duration:.2f}s, got {actual_duration:.2f}s")
-            # Extract loop from beginning (cleanest beat alignment)
-            loop_output = output[:, :loop_samples]
-            loop_output_int16 = loop_output.mul(32767).to(torch.int16).cpu()
-            # Save to temporary file
-            loop_filename = f"loop_{loop_type}_{bpm}bpm_{bars}bars_{seed}.wav"
-            torchaudio.save(loop_filename, loop_output_int16, sample_rate)
-            actual_duration = loop_samples / sample_rate
-            print(f"✅ {loop_type.title()} loop generated: {actual_duration:.2f}s in {generation_time:.2f}s")
-            return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars)"
     except Exception as e:
         print(f"❌ Generation error: {str(e)}")

 @contextmanager
 def resource_cleanup():
+    """Lightweight context manager - let zerogpu handle memory management"""
     try:
         yield
     finally:
+        # Minimal cleanup - let zerogpu handle the heavy lifting
         if torch.cuda.is_available():
             torch.cuda.synchronize()
+        # Removed aggressive empty_cache() and gc.collect() calls
 def load_stable_audio_model():
     """Load stable-audio-open-small model if not already loaded."""
     with model_lock:
         if 'stable_audio_model' not in model_cache:
             print("🔄 Loading stable-audio-open-small model...")
+            load_start = time.time()
             # Authenticate with HF
             hf_token = os.getenv('HF_TOKEN')
             if device == "cuda":
                 model = model.half()
+            load_time = time.time() - load_start
+            print(f"✅ Model loaded on {device} in {load_time:.2f}s")
+            # Aggressive model persistence - warm up with dummy generation
+            print("🔥 Warming up model...")
+            warmup_start = time.time()
+            try:
+                dummy_conditioning = [{"prompt": "test", "seconds_total": 12}]
+                with torch.no_grad():
+                    _ = generate_diffusion_cond(
+                        model,
+                        steps=1,  # Minimal steps for warmup
+                        cfg_scale=1.0,
+                        conditioning=dummy_conditioning,
+                        sample_size=config["sample_size"],
+                        sampler_type="pingpong",
+                        device=device,
+                        seed=42
+                    )
+                warmup_time = time.time() - warmup_start
+                print(f"🔥 Model warmed up in {warmup_time:.2f}s")
+            except Exception as e:
+                print(f"⚠️ Warmup failed (but continuing): {e}")
             model_cache['stable_audio_model'] = model
             model_cache['stable_audio_config'] = config
             model_cache['stable_audio_device'] = device
+            print(f"✅ Stable Audio model ready for fast generation!")
+        else:
+            print("♻️ Using cached model (should be fast!)")
         return (model_cache['stable_audio_model'],
                 model_cache['stable_audio_config'],
 def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
     """Generate a BPM-aware loop using stable-audio-open-small"""
     try:
+        total_start = time.time()
+        # Model loading timing
+        load_start = time.time()
         model, config, device = load_stable_audio_model()
+        load_time = time.time() - load_start
         # Calculate loop duration based on BPM and bars
         seconds_per_beat = 60.0 / bpm
         print(f"   Seed: {seed}")
         # Prepare conditioning
+        conditioning_start = time.time()
         conditioning = [{
             "prompt": enhanced_prompt,
             "seconds_total": 12  # Model generates 12s max
             "prompt": negative_prompt,
             "seconds_total": 12
         }]
+        conditioning_time = time.time() - conditioning_start
+        # Generation timing
+        generation_start = time.time()
+        # Removed aggressive resource cleanup wrapper
+        # Clear GPU cache once before generation (not after)
+        if device == "cuda":
+            torch.cuda.empty_cache()
+        with torch.cuda.amp.autocast(enabled=(device == "cuda")):
+            output = generate_diffusion_cond(
+                model,
+                steps=8,  # Fast generation
+                cfg_scale=1.0,  # Good balance for loops
+                conditioning=conditioning,
+                negative_conditioning=negative_conditioning,
+                sample_size=config["sample_size"],
+                sampler_type="pingpong",
+                device=device,
+                seed=seed
+            )
+        generation_time = time.time() - generation_start
+        # Post-processing timing
+        postproc_start = time.time()
+        # Post-process audio
+        output = rearrange(output, "b d n -> d (b n)")  # (2, N) stereo
+        output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1)
+        # Extract the loop portion
+        sample_rate = config["sample_rate"]
+        loop_samples = int(target_loop_duration * sample_rate)
+        available_samples = output.shape[1]
+        if loop_samples > available_samples:
+            loop_samples = available_samples
+            actual_duration = available_samples / sample_rate
+            print(f"⚠️ Requested {target_loop_duration:.2f}s, got {actual_duration:.2f}s")
+        # Extract loop from beginning (cleanest beat alignment)
+        loop_output = output[:, :loop_samples]
+        loop_output_int16 = loop_output.mul(32767).to(torch.int16).cpu()
+        # Save to temporary file
+        loop_filename = f"loop_{loop_type}_{bpm}bpm_{bars}bars_{seed}.wav"
+        torchaudio.save(loop_filename, loop_output_int16, sample_rate)
+        postproc_time = time.time() - postproc_start
+        total_time = time.time() - total_start
+        actual_duration = loop_samples / sample_rate
+        # Detailed timing breakdown
+        print(f"⏱️ Timing breakdown:")
+        print(f"   Model load: {load_time:.2f}s")
+        print(f"   Conditioning: {conditioning_time:.3f}s")
+        print(f"   Generation: {generation_time:.2f}s")
+        print(f"   Post-processing: {postproc_time:.3f}s")
+        print(f"   Total: {total_time:.2f}s")
+        print(f"✅ {loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
+        return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s"
     except Exception as e:
         print(f"❌ Generation error: {str(e)}")