Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ from streaming_stt_nemo import Model
|
|
9 |
import torch
|
10 |
import random
|
11 |
from openai import OpenAI
|
12 |
-
|
13 |
|
14 |
default_lang = "en"
|
15 |
|
@@ -91,22 +91,41 @@ async def respond(audio, model, seed):
|
|
91 |
await communicate.save(tmp_path)
|
92 |
yield tmp_path
|
93 |
|
94 |
-
#
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
def translate_speech(
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
|
|
103 |
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
DESCRIPTION = """ # <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>"""
|
112 |
|
@@ -146,18 +165,21 @@ with gr.Blocks(css="style.css") as demo:
|
|
146 |
)
|
147 |
|
148 |
with gr.TabItem("Speech Translation"):
|
149 |
-
input_audio = gr.Audio(label="
|
150 |
target_lang = gr.Dropdown(
|
151 |
-
choices=
|
152 |
-
value="
|
153 |
label="Target Language"
|
154 |
)
|
155 |
-
|
|
|
|
|
|
|
156 |
|
157 |
gr.Interface(
|
158 |
fn=translate_speech,
|
159 |
inputs=[input_audio, target_lang],
|
160 |
-
outputs=[
|
161 |
live=True
|
162 |
)
|
163 |
|
|
|
9 |
import torch
|
10 |
import random
|
11 |
from openai import OpenAI
|
12 |
+
import subprocess
|
13 |
|
14 |
default_lang = "en"
|
15 |
|
|
|
91 |
await communicate.save(tmp_path)
|
92 |
yield tmp_path
|
93 |
|
94 |
+
# Supported languages for seamless-expressive
|
95 |
+
LANGUAGE_CODES = {
|
96 |
+
"English": "eng",
|
97 |
+
"Spanish": "spa",
|
98 |
+
"French": "fra",
|
99 |
+
"German": "deu",
|
100 |
+
"Italian": "ita",
|
101 |
+
"Chinese": "cmn"
|
102 |
+
}
|
103 |
|
104 |
+
def translate_speech(audio_file, target_language):
|
105 |
+
"""
|
106 |
+
Translate input speech (audio file) to the specified target language.
|
107 |
+
"""
|
108 |
+
language_code = LANGUAGE_CODES[target_language]
|
109 |
+
output_file = "translated_audio.wav"
|
110 |
|
111 |
+
command = [
|
112 |
+
"expressivity_predict",
|
113 |
+
audio_file,
|
114 |
+
"--tgt_lang", language_code,
|
115 |
+
"--model_name", "seamless_expressivity",
|
116 |
+
"--vocoder_name", "vocoder_pretssel",
|
117 |
+
"--gated-model-dir", "seamlessmodel",
|
118 |
+
"--output_path", output_file
|
119 |
+
]
|
120 |
|
121 |
+
subprocess.run(command, check=True)
|
122 |
+
|
123 |
+
if os.path.exists(output_file):
|
124 |
+
print(f"File created successfully: {output_file}")
|
125 |
+
else:
|
126 |
+
print(f"File not found: {output_file}")
|
127 |
+
|
128 |
+
return output_file
|
129 |
|
130 |
DESCRIPTION = """ # <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>"""
|
131 |
|
|
|
165 |
)
|
166 |
|
167 |
with gr.TabItem("Speech Translation"):
|
168 |
+
input_audio = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
|
169 |
target_lang = gr.Dropdown(
|
170 |
+
choices=list(LANGUAGE_CODES.keys()),
|
171 |
+
value="Spanish",
|
172 |
label="Target Language"
|
173 |
)
|
174 |
+
output_audio = gr.Audio(label="Translated Audio",
|
175 |
+
interactive=False,
|
176 |
+
autoplay=True,
|
177 |
+
elem_classes="audio")
|
178 |
|
179 |
gr.Interface(
|
180 |
fn=translate_speech,
|
181 |
inputs=[input_audio, target_lang],
|
182 |
+
outputs=[output_audio],
|
183 |
live=True
|
184 |
)
|
185 |
|