ButterCream commited on
Commit
417a076
·
1 Parent(s): a835dc1

readd lain fix

Browse files
Files changed (2) hide show
  1. app.py +12 -5
  2. requirements.txt +1 -2
app.py CHANGED
@@ -5,13 +5,14 @@ import re
5
  import numpy as np
6
  from scipy.io.wavfile import write
7
  import nltk
8
- from VoPho.engine import Phonemizer
9
 
10
  nltk.download('punkt')
11
  from nltk.tokenize import word_tokenize
12
 
13
  import torch
14
 
 
 
15
  INTRO = """
16
  <style>
17
 
@@ -93,6 +94,13 @@ theme = gr.themes.Soft(
93
  block_background_fill='*neutral_50'
94
  )
95
 
 
 
 
 
 
 
 
96
 
97
  def split_and_recombine_text(text, desired_length=200, max_length=300):
98
  """Split text it into chunks of a desired length trying to keep sentences intact."""
@@ -165,12 +173,11 @@ def split_and_recombine_text(text, desired_length=200, max_length=300):
165
 
166
  return rv
167
 
168
- engine = Phonemizer()
169
 
170
  def text_to_phonemes(text):
171
  text = text.strip()
172
  print("Text before phonemization: ", text)
173
- ps = engine.phonemize(text)
174
  print("Text after phonemization: ", ps)
175
  ps = word_tokenize(ps[0])
176
  ps = ' '.join(ps)
@@ -211,7 +218,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
211
  thresh = np.percentile(np.abs(synthaud), 95)
212
  CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
213
  lead_percent = 0.008
214
- trail_percent = 0.0085
215
 
216
 
217
  # Leading artefact removal
@@ -252,7 +259,7 @@ def generate(audio_path, ins, speed, alpha, beta, embedding, steps=200):
252
 
253
  other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
254
  if torch.cuda.is_available():
255
- other_tts.devuce = "cuda"
256
  else:
257
  other_tts.device = "cpu"
258
 
 
5
  import numpy as np
6
  from scipy.io.wavfile import write
7
  import nltk
 
8
 
9
  nltk.download('punkt')
10
  from nltk.tokenize import word_tokenize
11
 
12
  import torch
13
 
14
+ import phonemizer # en-us
15
+
16
  INTRO = """
17
  <style>
18
 
 
94
  block_background_fill='*neutral_50'
95
  )
96
 
97
+ # eventually swap to something else
98
+ global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us',
99
+ preserve_punctuation=True,
100
+ with_stress=True,
101
+ language_switch="remove-flags",
102
+ tie=False)
103
+
104
 
105
  def split_and_recombine_text(text, desired_length=200, max_length=300):
106
  """Split text it into chunks of a desired length trying to keep sentences intact."""
 
173
 
174
  return rv
175
 
 
176
 
177
  def text_to_phonemes(text):
178
  text = text.strip()
179
  print("Text before phonemization: ", text)
180
+ ps = global_phonemizer.phonemize([text])
181
  print("Text after phonemization: ", ps)
182
  ps = word_tokenize(ps[0])
183
  ps = ' '.join(ps)
 
218
  thresh = np.percentile(np.abs(synthaud), 95)
219
  CUT_SAMPLES = 20000 # max samples to cut, in practice only 4-6k are actually cut
220
  lead_percent = 0.008
221
+ trail_percent = 0.009
222
 
223
 
224
  # Leading artefact removal
 
259
 
260
  other_tts = tts.StyleTTS2(model_checkpoint_path='./epoch_2nd_00012.pth', config_path="models/config_ft.yml")
261
  if torch.cuda.is_available():
262
+ other_tts.device = "cuda"
263
  else:
264
  other_tts.device = "cpu"
265
 
requirements.txt CHANGED
@@ -23,5 +23,4 @@ gradio
23
  spaces
24
  gruut
25
  txtsplit
26
- scipy
27
- VoPho
 
23
  spaces
24
  gruut
25
  txtsplit
26
+ scipy