Spaces:
Running
on
Zero
Running
on
Zero
ButterCream
commited on
Commit
·
b07d516
1
Parent(s):
417a076
upgrade to VoPho - resolved all issues
Browse files- app.py +5 -18
- requirements.txt +4 -3
app.py
CHANGED
@@ -5,14 +5,10 @@ import re
|
|
5 |
import numpy as np
|
6 |
from scipy.io.wavfile import write
|
7 |
import nltk
|
8 |
-
|
9 |
-
nltk.download('punkt')
|
10 |
-
from nltk.tokenize import word_tokenize
|
11 |
|
12 |
import torch
|
13 |
|
14 |
-
import phonemizer # en-us
|
15 |
-
|
16 |
INTRO = """
|
17 |
<style>
|
18 |
|
@@ -94,13 +90,6 @@ theme = gr.themes.Soft(
|
|
94 |
block_background_fill='*neutral_50'
|
95 |
)
|
96 |
|
97 |
-
# eventually swap to something else
|
98 |
-
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us',
|
99 |
-
preserve_punctuation=True,
|
100 |
-
with_stress=True,
|
101 |
-
language_switch="remove-flags",
|
102 |
-
tie=False)
|
103 |
-
|
104 |
|
105 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
106 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
@@ -173,15 +162,13 @@ def split_and_recombine_text(text, desired_length=200, max_length=300):
|
|
173 |
|
174 |
return rv
|
175 |
|
|
|
176 |
|
177 |
def text_to_phonemes(text):
|
178 |
text = text.strip()
|
179 |
print("Text before phonemization: ", text)
|
180 |
-
ps =
|
181 |
print("Text after phonemization: ", ps)
|
182 |
-
ps = word_tokenize(ps[0])
|
183 |
-
ps = ' '.join(ps)
|
184 |
-
print("Final text after tokenization: ", ps)
|
185 |
return ps
|
186 |
|
187 |
|
@@ -218,7 +205,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
|
|
218 |
thresh = np.percentile(np.abs(synthaud), 95)
|
219 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
220 |
lead_percent = 0.008
|
221 |
-
trail_percent = 0.
|
222 |
|
223 |
|
224 |
# Leading artefact removal
|
@@ -295,4 +282,4 @@ with gr.Blocks(theme=theme, js=js_func) as clone:
|
|
295 |
|
296 |
if __name__ == "__main__":
|
297 |
# demo.queue(api_open=False, max_size=15).launch(show_api=False)
|
298 |
-
clone.queue(api_open=False, max_size=15).launch(show_api=False)
|
|
|
5 |
import numpy as np
|
6 |
from scipy.io.wavfile import write
|
7 |
import nltk
|
8 |
+
from VoPho.engine import Phonemizer
|
|
|
|
|
9 |
|
10 |
import torch
|
11 |
|
|
|
|
|
12 |
INTRO = """
|
13 |
<style>
|
14 |
|
|
|
90 |
block_background_fill='*neutral_50'
|
91 |
)
|
92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
95 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
|
|
162 |
|
163 |
return rv
|
164 |
|
165 |
+
engine = Phonemizer()
|
166 |
|
167 |
def text_to_phonemes(text):
|
168 |
text = text.strip()
|
169 |
print("Text before phonemization: ", text)
|
170 |
+
ps = engine.phonemize(text)
|
171 |
print("Text after phonemization: ", ps)
|
|
|
|
|
|
|
172 |
return ps
|
173 |
|
174 |
|
|
|
205 |
thresh = np.percentile(np.abs(synthaud), 95)
|
206 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
207 |
lead_percent = 0.008
|
208 |
+
trail_percent = 0.0085
|
209 |
|
210 |
|
211 |
# Leading artefact removal
|
|
|
282 |
|
283 |
if __name__ == "__main__":
|
284 |
# demo.queue(api_open=False, max_size=15).launch(show_api=False)
|
285 |
+
clone.queue(api_open=False, max_size=15).launch(show_api=False)
|
requirements.txt
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
SoundFile
|
2 |
-
torchaudio==2.
|
3 |
munch
|
4 |
-
torch==2.
|
5 |
pydub
|
6 |
pyyaml
|
7 |
librosa
|
@@ -23,4 +23,5 @@ gradio
|
|
23 |
spaces
|
24 |
gruut
|
25 |
txtsplit
|
26 |
-
scipy
|
|
|
|
1 |
SoundFile
|
2 |
+
torchaudio==2.2.0
|
3 |
munch
|
4 |
+
torch==2.2.0
|
5 |
pydub
|
6 |
pyyaml
|
7 |
librosa
|
|
|
23 |
spaces
|
24 |
gruut
|
25 |
txtsplit
|
26 |
+
scipy
|
27 |
+
VoPho==0.0.8
|