Spaces:
Running
on
Zero
Running
on
Zero
ButterCream
commited on
Commit
·
417a076
1
Parent(s):
a835dc1
readd lain fix
Browse files- app.py +12 -5
- requirements.txt +1 -2
app.py
CHANGED
@@ -5,13 +5,14 @@ import re
|
|
5 |
import numpy as np
|
6 |
from scipy.io.wavfile import write
|
7 |
import nltk
|
8 |
-
from VoPho.engine import Phonemizer
|
9 |
|
10 |
nltk.download('punkt')
|
11 |
from nltk.tokenize import word_tokenize
|
12 |
|
13 |
import torch
|
14 |
|
|
|
|
|
15 |
INTRO = """
|
16 |
<style>
|
17 |
|
@@ -93,6 +94,13 @@ theme = gr.themes.Soft(
|
|
93 |
block_background_fill='*neutral_50'
|
94 |
)
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
98 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
@@ -165,12 +173,11 @@ def split_and_recombine_text(text, desired_length=200, max_length=300):
|
|
165 |
|
166 |
return rv
|
167 |
|
168 |
-
engine = Phonemizer()
|
169 |
|
170 |
def text_to_phonemes(text):
|
171 |
text = text.strip()
|
172 |
print("Text before phonemization: ", text)
|
173 |
-
ps =
|
174 |
print("Text after phonemization: ", ps)
|
175 |
ps = word_tokenize(ps[0])
|
176 |
ps = ' '.join(ps)
|
@@ -211,7 +218,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
|
|
211 |
thresh = np.percentile(np.abs(synthaud), 95)
|
212 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
213 |
lead_percent = 0.008
|
214 |
-
trail_percent = 0.
|
215 |
|
216 |
|
217 |
# Leading artefact removal
|
@@ -252,7 +259,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
|
|
252 |
|
253 |
other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
|
254 |
if torch.cuda.is_available():
|
255 |
-
other_tts.
|
256 |
else:
|
257 |
other_tts.device = "cpu"
|
258 |
|
|
|
5 |
import numpy as np
|
6 |
from scipy.io.wavfile import write
|
7 |
import nltk
|
|
|
8 |
|
9 |
nltk.download('punkt')
|
10 |
from nltk.tokenize import word_tokenize
|
11 |
|
12 |
import torch
|
13 |
|
14 |
+
import phonemizer # en-us
|
15 |
+
|
16 |
INTRO = """
|
17 |
<style>
|
18 |
|
|
|
94 |
block_background_fill='*neutral_50'
|
95 |
)
|
96 |
|
97 |
+
# eventually swap to something else
|
98 |
+
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us',
|
99 |
+
preserve_punctuation=True,
|
100 |
+
with_stress=True,
|
101 |
+
language_switch="remove-flags",
|
102 |
+
tie=False)
|
103 |
+
|
104 |
|
105 |
def split_and_recombine_text(text, desired_length=200, max_length=300):
|
106 |
"""Split text it into chunks of a desired length trying to keep sentences intact."""
|
|
|
173 |
|
174 |
return rv
|
175 |
|
|
|
176 |
|
177 |
def text_to_phonemes(text):
|
178 |
text = text.strip()
|
179 |
print("Text before phonemization: ", text)
|
180 |
+
ps = global_phonemizer.phonemize([text])
|
181 |
print("Text after phonemization: ", ps)
|
182 |
ps = word_tokenize(ps[0])
|
183 |
ps = ' '.join(ps)
|
|
|
218 |
thresh = np.percentile(np.abs(synthaud), 95)
|
219 |
CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
|
220 |
lead_percent = 0.008
|
221 |
+
trail_percent = 0.009
|
222 |
|
223 |
|
224 |
# Leading artefact removal
|
|
|
259 |
|
260 |
other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
|
261 |
if torch.cuda.is_available():
|
262 |
+
other_tts.device = "cuda"
|
263 |
else:
|
264 |
other_tts.device = "cpu"
|
265 |
|
requirements.txt
CHANGED
@@ -23,5 +23,4 @@ gradio
|
|
23 |
spaces
|
24 |
gruut
|
25 |
txtsplit
|
26 |
-
scipy
|
27 |
-
VoPho
|
|
|
23 |
spaces
|
24 |
gruut
|
25 |
txtsplit
|
26 |
+
scipy
|
|