Spaces:
Running
Running
add azure tts
Browse files- app.py +36 -25
- packages.txt +5 -1
- requirements.txt +2 -1
app.py
CHANGED
@@ -41,29 +41,28 @@ os.system("wget https://lf3-nlp-opensource.bytetos.com/obj/nlp-opensource/emnlp2
|
|
41 |
# sed -i 's/package_data={/# package_data={/g' setup.py; \
|
42 |
# pip install ./; cd ..")
|
43 |
|
44 |
-
os.system("git clone https://github.com/DigitalPhonetics/IMS-Toucan.git")
|
45 |
-
sys.path.append('./IMS-Toucan')
|
46 |
-
os.system("cd IMS-Toucan; pip install --no-cache-dir -r requirements.txt")
|
47 |
-
os.system("python run_model_downloader.py; cd ..")
|
48 |
|
49 |
-
from InferenceInterfaces.PortaSpeechInterface import PortaSpeechInterface
|
50 |
-
cwd = os.getcwd()
|
51 |
-
os.chdir('./IMS-Toucan')
|
52 |
-
tts = PortaSpeechInterface(device='cpu', tts_model_path='Meta')
|
53 |
-
os.chdir(cwd)
|
54 |
|
55 |
# azure tts
|
56 |
-
os.system("
|
57 |
-
os.system("
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
os.system("spx config @region --set eastus")
|
67 |
|
68 |
|
69 |
# The predict function. audio, language and mic_audio are all parameters directly passed by gradio
|
@@ -137,11 +136,23 @@ def predict(audio, src_language, tgt_language, mic_audio=None):
|
|
137 |
translation = (' '.join(r.readline().split(' ')[3:])).strip()
|
138 |
|
139 |
# tts
|
140 |
-
tts.set_language(tgt_language)
|
141 |
-
tts.read_to_file(text_list=[translation], file_location='output.wav')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
142 |
|
143 |
# Returns the text
|
144 |
-
return transcript, translation,
|
145 |
|
146 |
|
147 |
|
@@ -165,8 +176,8 @@ gr.Interface(
|
|
165 |
'Russian',
|
166 |
'French',
|
167 |
'Detect Language'], type="value", default='English', label="Select the language of input"),
|
168 |
-
gr.inputs.Dropdown([
|
169 |
-
|
170 |
'English',
|
171 |
'Spanish',
|
172 |
'Russian',
|
|
|
41 |
# sed -i 's/package_data={/# package_data={/g' setup.py; \
|
42 |
# pip install ./; cd ..")
|
43 |
|
44 |
+
# os.system("git clone https://github.com/DigitalPhonetics/IMS-Toucan.git")
|
45 |
+
# sys.path.append('./IMS-Toucan')
|
46 |
+
# os.system("cd IMS-Toucan; pip install --no-cache-dir -r requirements.txt")
|
47 |
+
# os.system("python run_model_downloader.py; cd ..")
|
48 |
|
49 |
+
# from InferenceInterfaces.PortaSpeechInterface import PortaSpeechInterface
|
50 |
+
# cwd = os.getcwd()
|
51 |
+
# os.chdir('./IMS-Toucan')
|
52 |
+
# tts = PortaSpeechInterface(device='cpu', tts_model_path='Meta')
|
53 |
+
# os.chdir(cwd)
|
54 |
|
55 |
# azure tts
|
56 |
+
# os.system("2d1847f4151f4f94ae06d0b620533936")
|
57 |
+
# os.system("eastus")
|
58 |
+
lang2voice = {
|
59 |
+
"zh": "zh-CN-XiaoxiaoNeural",
|
60 |
+
"ar": "ar-EG-SalmaNeural",
|
61 |
+
"en": "en-US-JennyNeural",
|
62 |
+
"es": "es-ES-AbrilNeural",
|
63 |
+
"ru": "ru-RU-DariyaNeural",
|
64 |
+
"fr": "fr-FR-AlainNeural",
|
65 |
+
}
|
|
|
66 |
|
67 |
|
68 |
# The predict function. audio, language and mic_audio are all parameters directly passed by gradio
|
|
|
136 |
translation = (' '.join(r.readline().split(' ')[3:])).strip()
|
137 |
|
138 |
# tts
|
139 |
+
# tts.set_language(tgt_language)
|
140 |
+
# tts.read_to_file(text_list=[translation], file_location='output.wav')
|
141 |
+
|
142 |
+
# azure tts
|
143 |
+
import azure.cognitiveservices.speech as speechsdk
|
144 |
+
speech_key = "2d1847f4151f4f94ae06d0b620533936"
|
145 |
+
service_region = "eastus"
|
146 |
+
|
147 |
+
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
|
148 |
+
# Note: the voice setting will not overwrite the voice element in input SSML.
|
149 |
+
speech_config.speech_synthesis_voice_name = lang2voice[tgt_language]
|
150 |
+
audio_config = speechsdk.audio.AudioOutputConfig(filename="output.wav")
|
151 |
+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
|
152 |
+
speech_synthesizer.speak_text(translation)
|
153 |
|
154 |
# Returns the text
|
155 |
+
return transcript, translation, "output.wav"
|
156 |
|
157 |
|
158 |
|
|
|
176 |
'Russian',
|
177 |
'French',
|
178 |
'Detect Language'], type="value", default='English', label="Select the language of input"),
|
179 |
+
gr.inputs.Dropdown(['Arabic',
|
180 |
+
'Chinese',
|
181 |
'English',
|
182 |
'Spanish',
|
183 |
'Russian',
|
packages.txt
CHANGED
@@ -1,3 +1,7 @@
|
|
1 |
espeak
|
2 |
libasound-dev
|
3 |
-
libportaudio2
|
|
|
|
|
|
|
|
|
|
1 |
espeak
|
2 |
libasound-dev
|
3 |
+
libportaudio2
|
4 |
+
build-essential
|
5 |
+
libssl-dev
|
6 |
+
libasound2
|
7 |
+
wget
|
requirements.txt
CHANGED
@@ -8,4 +8,5 @@ kytea
|
|
8 |
six
|
9 |
phonemizer
|
10 |
sentencepiece
|
11 |
-
hanziconv
|
|
|
|
8 |
six
|
9 |
phonemizer
|
10 |
sentencepiece
|
11 |
+
hanziconv
|
12 |
+
azure-cognitiveservices-speech
|