Spaces:
Runtime error
Runtime error
Sandiago21
commited on
Commit
β’
639d737
1
Parent(s):
6f905f8
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
@@ -14,12 +14,11 @@ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-
|
|
14 |
model_id = "Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german" # update with your model id
|
15 |
# pipe = pipeline("automatic-speech-recognition", model=model_id)
|
16 |
model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
|
|
|
17 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
18 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
19 |
speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
|
20 |
|
21 |
-
processor = SpeechT5Processor.from_pretrained(model_id)
|
22 |
-
|
23 |
replacements = [
|
24 |
("Γ", "E"),
|
25 |
("Γ", "E"),
|
@@ -112,8 +111,8 @@ def speech_to_speech_translation(audio):
|
|
112 |
|
113 |
title = "Cascaded STST"
|
114 |
description = """
|
115 |
-
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in
|
116 |
-
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech, fine-tuned in
|
117 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
118 |
"""
|
119 |
|
|
|
14 |
model_id = "Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german" # update with your model id
|
15 |
# pipe = pipeline("automatic-speech-recognition", model=model_id)
|
16 |
model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
|
17 |
+
processor = SpeechT5Processor.from_pretrained(model_id)
|
18 |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
|
19 |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
|
20 |
speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
|
21 |
|
|
|
|
|
22 |
replacements = [
|
23 |
("Γ", "E"),
|
24 |
("Γ", "E"),
|
|
|
111 |
|
112 |
title = "Cascaded STST"
|
113 |
description = """
|
114 |
+
Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in German. Demo uses OpenAI's [Whisper Large v2](https://huggingface.co/openai/whisper-large-v2) model for speech translation, and [Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german](https://huggingface.co/Sandiago21/speecht5_finetuned_mozilla_foundation_common_voice_13_german) checkpoint for text-to-speech, which is based on Microsoft's
|
115 |
+
[SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech, fine-tuned in German Audio dataset:
|
116 |
![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
|
117 |
"""
|
118 |
|