Spaces:
Build error
Build error
Troubleshoot file writing problem.
Browse filesWork with coqui synthesis in memory so that file is never written. Switch espeakng to synthesize on command line with subprocess package.
- src/synthesize.py +12 -13
src/synthesize.py
CHANGED
@@ -8,6 +8,7 @@ import subprocess
|
|
8 |
from scipy.io import wavfile
|
9 |
from transformers import pipeline
|
10 |
import os
|
|
|
11 |
|
12 |
def synth_mms(text:str, model:str):
|
13 |
'''
|
@@ -42,7 +43,9 @@ def synth_coqui(text:str, model:str):
|
|
42 |
text: Text to synthesze
|
43 |
model: Model code
|
44 |
Returns:
|
45 |
-
Streaming Wav and sampling rate.
|
|
|
|
|
46 |
'''
|
47 |
if model is not None:
|
48 |
# Get device
|
@@ -50,14 +53,11 @@ def synth_coqui(text:str, model:str):
|
|
50 |
|
51 |
# Init TTS
|
52 |
tts = TTS(model, progress_bar=False).to(device)
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
sampling_rate, wav = wavfile.read('test.wav')
|
57 |
-
os.remove("test.wav")
|
58 |
|
59 |
-
|
60 |
-
return wav, sampling_rate
|
61 |
else:
|
62 |
return None
|
63 |
|
@@ -74,12 +74,11 @@ def synth_espeakng(text:str, model:str):
|
|
74 |
'''
|
75 |
if model is not None:
|
76 |
|
77 |
-
|
78 |
-
esng = espeakng.Speaker()
|
79 |
-
esng.voice = model
|
80 |
-
esng.say(text, export_path="test.wav")
|
81 |
|
82 |
-
print(os.listdir())
|
83 |
sampling_rate, wav = wavfile.read('test.wav')
|
84 |
os.remove("test.wav")
|
85 |
|
|
|
8 |
from scipy.io import wavfile
|
9 |
from transformers import pipeline
|
10 |
import os
|
11 |
+
import numpy as np
|
12 |
|
13 |
def synth_mms(text:str, model:str):
|
14 |
'''
|
|
|
43 |
text: Text to synthesze
|
44 |
model: Model code
|
45 |
Returns:
|
46 |
+
Streaming Wav and sampling rate.
|
47 |
+
|
48 |
+
IMPORTANT: Current implementation assumes 22050 sampling rate, this should be verified when adding a new model.
|
49 |
'''
|
50 |
if model is not None:
|
51 |
# Get device
|
|
|
53 |
|
54 |
# Init TTS
|
55 |
tts = TTS(model, progress_bar=False).to(device)
|
56 |
+
|
57 |
+
# Infer
|
58 |
+
wav = tts.tts(text=text) # is_multi_speaker=False
|
|
|
|
|
59 |
|
60 |
+
return np.array(wav), 22050
|
|
|
61 |
else:
|
62 |
return None
|
63 |
|
|
|
74 |
'''
|
75 |
if model is not None:
|
76 |
|
77 |
+
subprocess.run(['espeak-ng', f'-v{model}', "-w test.wav", text])
|
78 |
+
#esng = espeakng.Speaker()
|
79 |
+
#esng.voice = model
|
80 |
+
#esng.say(text, export_path="test.wav")
|
81 |
|
|
|
82 |
sampling_rate, wav = wavfile.read('test.wav')
|
83 |
os.remove("test.wav")
|
84 |
|