Spaces:

Vinay15
/

Text-to-Speech_Model_for_English_Technical_Speech

Sleeping

Vinay15 commited on Oct 24, 2024

Commit

64adb3b

verified ·

1 Parent(s): f2006ef

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import torch
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from datasets import load_dataset
 import soundfile as sf
 # Step 3: Load the models and the pronunciation dictionary
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
@@ -39,11 +40,12 @@ def text_to_speech(input_text):
     # Generate speech using the model and vocoder
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
-    # Save the generated speech as a .wav file
-    output_file = "speech_output.wav"
-    sf.write(output_file, speech.numpy(), samplerate=16000)
-    return output_file
 # Step 5: Create Gradio interface with examples
 examples = [

 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from datasets import load_dataset
 import soundfile as sf
+import io
 # Step 3: Load the models and the pronunciation dictionary
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
     # Generate speech using the model and vocoder
     speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    # Convert generated speech to an in-memory buffer
+    audio_buffer = io.BytesIO()
+    sf.write(audio_buffer, speech.numpy(), samplerate=16000, format='WAV')
+    audio_buffer.seek(0)
+    return audio_buffer
 # Step 5: Create Gradio interface with examples
 examples = [