File size: 996 Bytes
795d45e
 
 
 
251db9c
795d45e
 
 
251db9c
795d45e
 
 
 
251db9c
795d45e
 
251db9c
795d45e
 
251db9c
795d45e
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import gradio as gr
import torchaudio
from speechbrain.inference.vocoders import HIFIGAN
from speechbrain.tts import Tacotron2

# Initialize Tacotron2 TTS model and HIFIGAN vocoder
tts_model = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="/tmpdir_tacotron2")
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="/tmpdir_hifigan")

# Function to generate speech
def generate_speech(text):
    # Encode text using Tacotron2
    mel_output, mel_length = tts_model.encode_text(text)

    # Decode mel spectrogram to waveform using HIFIGAN vocoder
    waveform = hifi_gan.decode_batch(mel_output)

    # Return the generated waveform for Gradio to play
    return waveform.squeeze(1)

# Interface for Gradio
iface = gr.Interface(
    fn=generate_speech,
    inputs=gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech..."),
    outputs=gr.Audio(label="Output Speech")
)

# Launch the Gradio interface
iface.launch()