Vinay15 commited on
Commit
64adb3b
·
verified ·
1 Parent(s): f2006ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -4
app.py CHANGED
@@ -8,6 +8,7 @@ import torch
8
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
9
  from datasets import load_dataset
10
  import soundfile as sf
 
11
 
12
  # Step 3: Load the models and the pronunciation dictionary
13
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
@@ -39,11 +40,12 @@ def text_to_speech(input_text):
39
  # Generate speech using the model and vocoder
40
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
41
 
42
- # Save the generated speech as a .wav file
43
- output_file = "speech_output.wav"
44
- sf.write(output_file, speech.numpy(), samplerate=16000)
 
45
 
46
- return output_file
47
 
48
  # Step 5: Create Gradio interface with examples
49
  examples = [
 
8
  from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
9
  from datasets import load_dataset
10
  import soundfile as sf
11
+ import io
12
 
13
  # Step 3: Load the models and the pronunciation dictionary
14
  processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
 
40
  # Generate speech using the model and vocoder
41
  speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
42
 
43
+ # Convert generated speech to an in-memory buffer
44
+ audio_buffer = io.BytesIO()
45
+ sf.write(audio_buffer, speech.numpy(), samplerate=16000, format='WAV')
46
+ audio_buffer.seek(0)
47
 
48
+ return audio_buffer
49
 
50
  # Step 5: Create Gradio interface with examples
51
  examples = [