import gradio as gr | |
# from TTS.api import TTS | |
# tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=False) | |
# def predict(text): | |
# file_path = "output.wav" | |
# tts.tts_to_file(text, speaker=tts.speakers[0], language="en", file_path=file_path) | |
# return file_path | |
# demo = gr.Interface( | |
# fn=predict, | |
# inputs='text', | |
# outputs='audio' | |
# ) | |
# demo.launch() | |
import librosa | |
import numpy as np | |
import torch | |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan | |
checkpoint = "microsoft/speecht5_tts" | |
processor = SpeechT5Processor.from_pretrained(checkpoint) | |
model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint) | |
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan") |