Jaward commited on
Commit
81e33eb
·
verified ·
1 Parent(s): 8da7d41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -19
app.py CHANGED
@@ -9,7 +9,7 @@ from streaming_stt_nemo import Model
9
  import torch
10
  import random
11
  from openai import OpenAI
12
- from transformers import AutoProcessor, SeamlessM4TModel
13
 
14
  default_lang = "en"
15
 
@@ -91,22 +91,41 @@ async def respond(audio, model, seed):
91
  await communicate.save(tmp_path)
92
  yield tmp_path
93
 
94
- # Load the Seamless Streaming model and processor
95
- processor = AutoProcessor.from_pretrained("facebook/seamless-streaming")
96
- seamless_model = SeamlessM4TModel.from_pretrained("facebook/seamless-streaming")
 
 
 
 
 
 
97
 
98
- def translate_speech(audio, target_lang):
99
- if audio is None:
100
- return ""
101
- audio_array, sample_rate = audio
102
- inputs = processor(audios=audio_array, return_tensors="pt", sampling_rate=sample_rate)
 
103
 
104
- with torch.no_grad():
105
- generated_speech = seamless_model.generate(**inputs, tgt_lang=target_lang)
 
 
 
 
 
 
 
106
 
107
- translated_text = processor.batch_decode(generated_speech, skip_special_tokens=True)[0]
108
-
109
- return translated_text
 
 
 
 
 
110
 
111
  DESCRIPTION = """ # <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>"""
112
 
@@ -146,18 +165,21 @@ with gr.Blocks(css="style.css") as demo:
146
  )
147
 
148
  with gr.TabItem("Speech Translation"):
149
- input_audio = gr.Audio(label="Input Speech", sources="microphone", type="numpy", streaming=True)
150
  target_lang = gr.Dropdown(
151
- choices=["spa", "fra", "deu", "ita", "jpn", "kor", "cmn"],
152
- value="spa",
153
  label="Target Language"
154
  )
155
- output_text = gr.Textbox(label="Translated Text")
 
 
 
156
 
157
  gr.Interface(
158
  fn=translate_speech,
159
  inputs=[input_audio, target_lang],
160
- outputs=[output_text],
161
  live=True
162
  )
163
 
 
9
  import torch
10
  import random
11
  from openai import OpenAI
12
+ import subprocess
13
 
14
  default_lang = "en"
15
 
 
91
  await communicate.save(tmp_path)
92
  yield tmp_path
93
 
94
+ # Supported languages for seamless-expressive
95
+ LANGUAGE_CODES = {
96
+ "English": "eng",
97
+ "Spanish": "spa",
98
+ "French": "fra",
99
+ "German": "deu",
100
+ "Italian": "ita",
101
+ "Chinese": "cmn"
102
+ }
103
 
104
+ def translate_speech(audio_file, target_language):
105
+ """
106
+ Translate input speech (audio file) to the specified target language.
107
+ """
108
+ language_code = LANGUAGE_CODES[target_language]
109
+ output_file = "translated_audio.wav"
110
 
111
+ command = [
112
+ "expressivity_predict",
113
+ audio_file,
114
+ "--tgt_lang", language_code,
115
+ "--model_name", "seamless_expressivity",
116
+ "--vocoder_name", "vocoder_pretssel",
117
+ "--gated-model-dir", "seamlessmodel",
118
+ "--output_path", output_file
119
+ ]
120
 
121
+ subprocess.run(command, check=True)
122
+
123
+ if os.path.exists(output_file):
124
+ print(f"File created successfully: {output_file}")
125
+ else:
126
+ print(f"File not found: {output_file}")
127
+
128
+ return output_file
129
 
130
  DESCRIPTION = """ # <center><b>Hello, I am Optimus Prime your personal AI voice assistant</b></center>"""
131
 
 
165
  )
166
 
167
  with gr.TabItem("Speech Translation"):
168
+ input_audio = gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False)
169
  target_lang = gr.Dropdown(
170
+ choices=list(LANGUAGE_CODES.keys()),
171
+ value="Spanish",
172
  label="Target Language"
173
  )
174
+ output_audio = gr.Audio(label="Translated Audio",
175
+ interactive=False,
176
+ autoplay=True,
177
+ elem_classes="audio")
178
 
179
  gr.Interface(
180
  fn=translate_speech,
181
  inputs=[input_audio, target_lang],
182
+ outputs=[output_audio],
183
  live=True
184
  )
185