Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import torch
|
|
8 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
9 |
from datasets import load_dataset
|
10 |
import soundfile as sf
|
|
|
11 |
|
12 |
# Step 3: Load the models and the pronunciation dictionary
|
13 |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
@@ -39,11 +40,12 @@ def text_to_speech(input_text):
|
|
39 |
# Generate speech using the model and vocoder
|
40 |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
|
41 |
|
42 |
-
#
|
43 |
-
|
44 |
-
sf.write(
|
|
|
45 |
|
46 |
-
return
|
47 |
|
48 |
# Step 5: Create Gradio interface with examples
|
49 |
examples = [
|
|
|
8 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
9 |
from datasets import load_dataset
|
10 |
import soundfile as sf
|
11 |
+
import io
|
12 |
|
13 |
# Step 3: Load the models and the pronunciation dictionary
|
14 |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
|
|
40 |
# Generate speech using the model and vocoder
|
41 |
speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
|
42 |
|
43 |
+
# Convert generated speech to an in-memory buffer
|
44 |
+
audio_buffer = io.BytesIO()
|
45 |
+
sf.write(audio_buffer, speech.numpy(), samplerate=16000, format='WAV')
|
46 |
+
audio_buffer.seek(0)
|
47 |
|
48 |
+
return audio_buffer
|
49 |
|
50 |
# Step 5: Create Gradio interface with examples
|
51 |
examples = [
|