Last commit not found
import gradio as gr | |
from transformers import SpeechT5ForTextToSpeech, SpeechT5Processor | |
import torch | |
import torchaudio | |
import tempfile | |
# Load model and processor | |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") | |
model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts") | |
# Load a voice embedding (necessary for the SpeechT5 model) | |
speaker_embedding, _ = torchaudio.load("https://huggingface.co/microsoft/speecht5_tts/blob/main/speaker_embeddings/english/vctk_speaker_0.pt") | |
def text_to_speech(text): | |
inputs = processor(text, return_tensors="pt") | |
speech = model.generate_speech(inputs["input_ids"], speaker_embedding) | |
# Save the output to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: | |
torchaudio.save(f.name, speech, 16000) | |
return f.name | |
# Gradio interface | |
interface = gr.Interface( | |
fn=text_to_speech, | |
inputs="text", | |
outputs="audio", | |
title="Text to Speech", | |
description="Convert text to speech using the microsoft/speecht5_tts model" | |
) | |
interface.launch() | |