speech-to-speech-translation

Runtime error

sjdata commited on Jul 17, 2023

Commit

85b796a

1 Parent(s): e683f3f

Added API calls to trained voice

Files changed (7) hide show

.idea/.gitignore ADDED Viewed

+# Default ignored files
+/shelf/
+/workspace.xml

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (speech-to-speech-translation)" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/speech-to-speech-translation.iml" filepath="$PROJECT_DIR$/.idea/speech-to-speech-translation.iml" />
+    </modules>
+  </component>
+</project>

.idea/speech-to-speech-translation.iml ADDED Viewed

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$">
+      <excludeFolder url="file://$MODULE_DIR$/venv" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/vcs.xml ADDED Viewed

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

app.py CHANGED Viewed

@@ -14,15 +14,15 @@ asr_pipe = pipeline("automatic-speech-recognition", model="openai/whisper-base",
 # load text-to-speech checkpoint and speaker embeddings
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
-model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
-embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
-speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
 def translate(audio):
-    outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "translate"})
     return outputs["text"]

 # load text-to-speech checkpoint and speaker embeddings
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+model = SpeechT5ForTextToSpeech.from_pretrained("sjdata/speecht5_finetuned_single_speaker_de_small_librivox").to(device)
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)
+embeddings_dataset = load_dataset("sjdata/test-embeddings-dataset", split="train")
+speaker_embeddings = torch.tensor(embeddings_dataset[9]["speaker_embeddings"]).unsqueeze(0)
 def translate(audio):
+    outputs = asr_pipe(audio, max_new_tokens=256, generate_kwargs={"task": "transcribe", "language": "de"})
     return outputs["text"]