Spaces:
Runtime error
Runtime error
fix for latent diffusion
Browse files
audiodiffusion/__init__.py
CHANGED
@@ -10,7 +10,7 @@ from diffusers import (DiffusionPipeline, DDPMPipeline, UNet2DConditionModel,
|
|
10 |
|
11 |
from .mel import Mel
|
12 |
|
13 |
-
VERSION = "1.2.
|
14 |
|
15 |
|
16 |
class AudioDiffusion:
|
@@ -199,8 +199,11 @@ class AudioDiffusionPipeline(DiffusionPipeline):
|
|
199 |
self.scheduler.set_timesteps(steps)
|
200 |
step_generator = step_generator or generator
|
201 |
mask = None
|
|
|
|
|
|
|
202 |
images = noise = torch.randn(
|
203 |
-
(batch_size, self.unet.in_channels
|
204 |
generator=generator)
|
205 |
|
206 |
if audio_file is not None or raw_audio is not None:
|
@@ -223,7 +226,9 @@ class AudioDiffusionPipeline(DiffusionPipeline):
|
|
223 |
torch.tensor(input_images[:, np.newaxis, np.newaxis, :]),
|
224 |
noise, torch.tensor(steps - start_step))
|
225 |
|
226 |
-
pixels_per_second = (
|
|
|
|
|
227 |
mask_start = int(mask_start_secs * pixels_per_second)
|
228 |
mask_end = int(mask_end_secs * pixels_per_second)
|
229 |
mask = self.scheduler.add_noise(
|
|
|
10 |
|
11 |
from .mel import Mel
|
12 |
|
13 |
+
VERSION = "1.2.2"
|
14 |
|
15 |
|
16 |
class AudioDiffusion:
|
|
|
199 |
self.scheduler.set_timesteps(steps)
|
200 |
step_generator = step_generator or generator
|
201 |
mask = None
|
202 |
+
# For backwards compatiibility
|
203 |
+
if type(self.unet.sample_size) == int:
|
204 |
+
self.unet.sample_size = (self.unet.sample_size, self.unet.sample_size)
|
205 |
images = noise = torch.randn(
|
206 |
+
(batch_size, self.unet.in_channels) + self.unet.sample_size,
|
207 |
generator=generator)
|
208 |
|
209 |
if audio_file is not None or raw_audio is not None:
|
|
|
226 |
torch.tensor(input_images[:, np.newaxis, np.newaxis, :]),
|
227 |
noise, torch.tensor(steps - start_step))
|
228 |
|
229 |
+
pixels_per_second = (self.unet.sample_size[1] *
|
230 |
+
mel.get_sample_rate() / mel.x_res /
|
231 |
+
mel.hop_length)
|
232 |
mask_start = int(mask_start_secs * pixels_per_second)
|
233 |
mask_end = int(mask_end_secs * pixels_per_second)
|
234 |
mask = self.scheduler.add_noise(
|