Spaces:
Running
Running
swap to gradio 4.44 & add adaptive duration
Browse files
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🎧
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 4.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: cc-by-sa-4.0
|
@@ -12,4 +12,4 @@ short_description: Edit audios with text prompts
|
|
12 |
---
|
13 |
|
14 |
The 30-second limit was introduced to ensure that queue wait times remain reasonable, especially when there are a lot of users.
|
15 |
-
For that reason pull-requests that change this limit will not be merged. Please clone or duplicate the space to work locally without limits.
|
|
|
4 |
colorFrom: indigo
|
5 |
colorTo: gray
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 4.44.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: cc-by-sa-4.0
|
|
|
12 |
---
|
13 |
|
14 |
The 30-second limit was introduced to ensure that queue wait times remain reasonable, especially when there are a lot of users.
|
15 |
+
For that reason pull-requests that change this limit will not be merged. Please clone or duplicate the space to work locally without limits.
|
app.py
CHANGED
@@ -73,7 +73,31 @@ def sample(ldm_stable, zs, wts, steps, prompt_tar, tstart, cfg_scale_tar): # ,
|
|
73 |
|
74 |
return (16000, audio.squeeze().cpu().numpy())
|
75 |
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
def edit(
|
78 |
# cache_dir,
|
79 |
input_audio,
|
|
|
73 |
|
74 |
return (16000, audio.squeeze().cpu().numpy())
|
75 |
|
76 |
+
def get_duration(input_audio, model_id: str, do_inversion: bool,
|
77 |
+
wts: Optional[torch.Tensor], zs: Optional[torch.Tensor],
|
78 |
+
saved_inv_model: str, source_prompt="", target_prompt="",
|
79 |
+
steps=200, cfg_scale_src=3.5, cfg_scale_tar=12, t_start=45, randomize_seed=True):
|
80 |
+
if model_id == LDM2:
|
81 |
+
factor = 0.8
|
82 |
+
elif model_id == LDM2_LARGE:
|
83 |
+
factor = 1.5
|
84 |
+
else: # MUSIC
|
85 |
+
factor = 1
|
86 |
+
|
87 |
+
mult = 0
|
88 |
+
if do_inversion or randomize_seed:
|
89 |
+
mult = steps
|
90 |
+
|
91 |
+
if input_audio is None:
|
92 |
+
raise gr.Error('Input audio missing!')
|
93 |
+
duration = min(utils.get_duration(input_audio), 30)
|
94 |
+
|
95 |
+
time_per_iter_of_full = factor * ((t_start /100 * steps)*2 + mult) * 0.2
|
96 |
+
print('expected time:', time_per_iter_of_full / 30 * duration)
|
97 |
+
return time_per_iter_of_full / 30 * duration
|
98 |
+
|
99 |
+
|
100 |
+
@spaces.GPU(duration=get_duration)
|
101 |
def edit(
|
102 |
# cache_dir,
|
103 |
input_audio,
|
requirements.txt
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
torch
|
|
|
2 |
torchaudio
|
3 |
diffusers
|
4 |
accelerate
|
|
|
1 |
torch
|
2 |
+
numpy<2
|
3 |
torchaudio
|
4 |
diffusers
|
5 |
accelerate
|
utils.py
CHANGED
@@ -2,6 +2,7 @@ import numpy as np
|
|
2 |
import torch
|
3 |
from typing import Optional, List, Tuple, NamedTuple, Union
|
4 |
from models import PipelineWrapper
|
|
|
5 |
|
6 |
|
7 |
class PromptEmbeddings(NamedTuple):
|
@@ -16,7 +17,7 @@ def load_audio(audio_path: Union[str, np.array], fn_STFT, left: int = 0, right:
|
|
16 |
import audioldm
|
17 |
import audioldm.audio
|
18 |
|
19 |
-
duration = min(
|
20 |
|
21 |
mel, _, _ = audioldm.audio.wav_to_fbank(audio_path, target_length=int(duration * 102.4), fn_STFT=fn_STFT)
|
22 |
mel = mel.unsqueeze(0)
|
|
|
2 |
import torch
|
3 |
from typing import Optional, List, Tuple, NamedTuple, Union
|
4 |
from models import PipelineWrapper
|
5 |
+
from audioldm.utils import get_duration
|
6 |
|
7 |
|
8 |
class PromptEmbeddings(NamedTuple):
|
|
|
17 |
import audioldm
|
18 |
import audioldm.audio
|
19 |
|
20 |
+
duration = min(get_duration(audio_path), 30)
|
21 |
|
22 |
mel, _, _ = audioldm.audio.wav_to_fbank(audio_path, target_length=int(duration * 102.4), fn_STFT=fn_STFT)
|
23 |
mel = mel.unsqueeze(0)
|