Spaces:
Running
on
Zero
Running
on
Zero
Commit
Β·
3e6fdea
1
Parent(s):
108943b
trying to solve model loading
Browse files
app.py
CHANGED
@@ -25,20 +25,21 @@ model_lock = threading.Lock()
|
|
25 |
|
26 |
@contextmanager
|
27 |
def resource_cleanup():
|
28 |
-
"""
|
29 |
try:
|
30 |
yield
|
31 |
finally:
|
|
|
32 |
if torch.cuda.is_available():
|
33 |
torch.cuda.synchronize()
|
34 |
-
|
35 |
-
gc.collect()
|
36 |
|
37 |
def load_stable_audio_model():
|
38 |
"""Load stable-audio-open-small model if not already loaded."""
|
39 |
with model_lock:
|
40 |
if 'stable_audio_model' not in model_cache:
|
41 |
print("π Loading stable-audio-open-small model...")
|
|
|
42 |
|
43 |
# Authenticate with HF
|
44 |
hf_token = os.getenv('HF_TOKEN')
|
@@ -53,10 +54,36 @@ def load_stable_audio_model():
|
|
53 |
if device == "cuda":
|
54 |
model = model.half()
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
model_cache['stable_audio_model'] = model
|
57 |
model_cache['stable_audio_config'] = config
|
58 |
model_cache['stable_audio_device'] = device
|
59 |
-
print(f"β
Stable Audio model
|
|
|
|
|
60 |
|
61 |
return (model_cache['stable_audio_model'],
|
62 |
model_cache['stable_audio_config'],
|
@@ -66,7 +93,12 @@ def load_stable_audio_model():
|
|
66 |
def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
67 |
"""Generate a BPM-aware loop using stable-audio-open-small"""
|
68 |
try:
|
|
|
|
|
|
|
|
|
69 |
model, config, device = load_stable_audio_model()
|
|
|
70 |
|
71 |
# Calculate loop duration based on BPM and bars
|
72 |
seconds_per_beat = 60.0 / bpm
|
@@ -95,6 +127,7 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
|
95 |
print(f" Seed: {seed}")
|
96 |
|
97 |
# Prepare conditioning
|
|
|
98 |
conditioning = [{
|
99 |
"prompt": enhanced_prompt,
|
100 |
"seconds_total": 12 # Model generates 12s max
|
@@ -104,54 +137,70 @@ def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
|
104 |
"prompt": negative_prompt,
|
105 |
"seconds_total": 12
|
106 |
}]
|
|
|
107 |
|
108 |
-
|
|
|
109 |
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
except Exception as e:
|
157 |
print(f"β Generation error: {str(e)}")
|
|
|
25 |
|
26 |
@contextmanager
|
27 |
def resource_cleanup():
|
28 |
+
"""Lightweight context manager - let zerogpu handle memory management"""
|
29 |
try:
|
30 |
yield
|
31 |
finally:
|
32 |
+
# Minimal cleanup - let zerogpu handle the heavy lifting
|
33 |
if torch.cuda.is_available():
|
34 |
torch.cuda.synchronize()
|
35 |
+
# Removed aggressive empty_cache() and gc.collect() calls
|
|
|
36 |
|
37 |
def load_stable_audio_model():
|
38 |
"""Load stable-audio-open-small model if not already loaded."""
|
39 |
with model_lock:
|
40 |
if 'stable_audio_model' not in model_cache:
|
41 |
print("π Loading stable-audio-open-small model...")
|
42 |
+
load_start = time.time()
|
43 |
|
44 |
# Authenticate with HF
|
45 |
hf_token = os.getenv('HF_TOKEN')
|
|
|
54 |
if device == "cuda":
|
55 |
model = model.half()
|
56 |
|
57 |
+
load_time = time.time() - load_start
|
58 |
+
print(f"β
Model loaded on {device} in {load_time:.2f}s")
|
59 |
+
|
60 |
+
# Aggressive model persistence - warm up with dummy generation
|
61 |
+
print("π₯ Warming up model...")
|
62 |
+
warmup_start = time.time()
|
63 |
+
try:
|
64 |
+
dummy_conditioning = [{"prompt": "test", "seconds_total": 12}]
|
65 |
+
with torch.no_grad():
|
66 |
+
_ = generate_diffusion_cond(
|
67 |
+
model,
|
68 |
+
steps=1, # Minimal steps for warmup
|
69 |
+
cfg_scale=1.0,
|
70 |
+
conditioning=dummy_conditioning,
|
71 |
+
sample_size=config["sample_size"],
|
72 |
+
sampler_type="pingpong",
|
73 |
+
device=device,
|
74 |
+
seed=42
|
75 |
+
)
|
76 |
+
warmup_time = time.time() - warmup_start
|
77 |
+
print(f"π₯ Model warmed up in {warmup_time:.2f}s")
|
78 |
+
except Exception as e:
|
79 |
+
print(f"β οΈ Warmup failed (but continuing): {e}")
|
80 |
+
|
81 |
model_cache['stable_audio_model'] = model
|
82 |
model_cache['stable_audio_config'] = config
|
83 |
model_cache['stable_audio_device'] = device
|
84 |
+
print(f"β
Stable Audio model ready for fast generation!")
|
85 |
+
else:
|
86 |
+
print("β»οΈ Using cached model (should be fast!)")
|
87 |
|
88 |
return (model_cache['stable_audio_model'],
|
89 |
model_cache['stable_audio_config'],
|
|
|
93 |
def generate_stable_audio_loop(prompt, loop_type, bpm, bars, seed=-1):
|
94 |
"""Generate a BPM-aware loop using stable-audio-open-small"""
|
95 |
try:
|
96 |
+
total_start = time.time()
|
97 |
+
|
98 |
+
# Model loading timing
|
99 |
+
load_start = time.time()
|
100 |
model, config, device = load_stable_audio_model()
|
101 |
+
load_time = time.time() - load_start
|
102 |
|
103 |
# Calculate loop duration based on BPM and bars
|
104 |
seconds_per_beat = 60.0 / bpm
|
|
|
127 |
print(f" Seed: {seed}")
|
128 |
|
129 |
# Prepare conditioning
|
130 |
+
conditioning_start = time.time()
|
131 |
conditioning = [{
|
132 |
"prompt": enhanced_prompt,
|
133 |
"seconds_total": 12 # Model generates 12s max
|
|
|
137 |
"prompt": negative_prompt,
|
138 |
"seconds_total": 12
|
139 |
}]
|
140 |
+
conditioning_time = time.time() - conditioning_start
|
141 |
|
142 |
+
# Generation timing
|
143 |
+
generation_start = time.time()
|
144 |
|
145 |
+
# Removed aggressive resource cleanup wrapper
|
146 |
+
# Clear GPU cache once before generation (not after)
|
147 |
+
if device == "cuda":
|
148 |
+
torch.cuda.empty_cache()
|
149 |
+
|
150 |
+
with torch.cuda.amp.autocast(enabled=(device == "cuda")):
|
151 |
+
output = generate_diffusion_cond(
|
152 |
+
model,
|
153 |
+
steps=8, # Fast generation
|
154 |
+
cfg_scale=1.0, # Good balance for loops
|
155 |
+
conditioning=conditioning,
|
156 |
+
negative_conditioning=negative_conditioning,
|
157 |
+
sample_size=config["sample_size"],
|
158 |
+
sampler_type="pingpong",
|
159 |
+
device=device,
|
160 |
+
seed=seed
|
161 |
+
)
|
162 |
+
|
163 |
+
generation_time = time.time() - generation_start
|
164 |
+
|
165 |
+
# Post-processing timing
|
166 |
+
postproc_start = time.time()
|
167 |
+
|
168 |
+
# Post-process audio
|
169 |
+
output = rearrange(output, "b d n -> d (b n)") # (2, N) stereo
|
170 |
+
output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1)
|
171 |
+
|
172 |
+
# Extract the loop portion
|
173 |
+
sample_rate = config["sample_rate"]
|
174 |
+
loop_samples = int(target_loop_duration * sample_rate)
|
175 |
+
available_samples = output.shape[1]
|
176 |
+
|
177 |
+
if loop_samples > available_samples:
|
178 |
+
loop_samples = available_samples
|
179 |
+
actual_duration = available_samples / sample_rate
|
180 |
+
print(f"β οΈ Requested {target_loop_duration:.2f}s, got {actual_duration:.2f}s")
|
181 |
+
|
182 |
+
# Extract loop from beginning (cleanest beat alignment)
|
183 |
+
loop_output = output[:, :loop_samples]
|
184 |
+
loop_output_int16 = loop_output.mul(32767).to(torch.int16).cpu()
|
185 |
+
|
186 |
+
# Save to temporary file
|
187 |
+
loop_filename = f"loop_{loop_type}_{bpm}bpm_{bars}bars_{seed}.wav"
|
188 |
+
torchaudio.save(loop_filename, loop_output_int16, sample_rate)
|
189 |
+
|
190 |
+
postproc_time = time.time() - postproc_start
|
191 |
+
total_time = time.time() - total_start
|
192 |
+
actual_duration = loop_samples / sample_rate
|
193 |
+
|
194 |
+
# Detailed timing breakdown
|
195 |
+
print(f"β±οΈ Timing breakdown:")
|
196 |
+
print(f" Model load: {load_time:.2f}s")
|
197 |
+
print(f" Conditioning: {conditioning_time:.3f}s")
|
198 |
+
print(f" Generation: {generation_time:.2f}s")
|
199 |
+
print(f" Post-processing: {postproc_time:.3f}s")
|
200 |
+
print(f" Total: {total_time:.2f}s")
|
201 |
+
print(f"β
{loop_type.title()} loop: {actual_duration:.2f}s audio in {total_time:.2f}s")
|
202 |
+
|
203 |
+
return loop_filename, f"Generated {actual_duration:.2f}s {loop_type} loop at {bpm}bpm ({bars} bars) in {total_time:.2f}s"
|
204 |
|
205 |
except Exception as e:
|
206 |
print(f"β Generation error: {str(e)}")
|