speech-to-speech-translation

Sleeping

sfedar commited on Sep 13, 2024

Commit

6b64d77

verified ·

1 Parent(s): 799bc28

Add a separate pipeline for translation (Helsinki-NLP/opus-mt model)

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,8 +8,9 @@ from transformers import SpeechT5ForTextToSpeech, SpeechT5HifiGan, SpeechT5Proce
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# load speech translation checkpoint
 asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
 # load text-to-speech checkpoint and speaker embeddings
 tts_model_name = "sanchit-gandhi/speecht5_tts_vox_nl"
@@ -22,8 +23,9 @@ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze
 def translate(audio):
-    outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate", "language": "dutch"})
-    return outputs["text"]
 def synthesise(text):

 device = "cuda:0" if torch.cuda.is_available() else "cpu"
+# load speech translation checkpoints
 asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=device)
+translation_pipeline = pipeline("translation", model="Helsinki-NLP/opus-mt-en-nl", device=device)
 # load text-to-speech checkpoint and speaker embeddings
 tts_model_name = "sanchit-gandhi/speecht5_tts_vox_nl"
 def translate(audio):
+    transcripts = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})["text"]
+    outputs = translation_pipeline(transcripts)
+    return outputs[0]['translation_text']
 def synthesise(text):