Spaces:
Running
Running
File size: 2,958 Bytes
532dc11 4300fed 7ffbb2d 4300fed ed9d4e3 4300fed ed9d4e3 dcc7832 ed9d4e3 1272fd4 1241f18 532dc11 e04b055 406e977 994df9c 406e977 532dc11 4300fed 245eecf 4300fed 1eb399c 1241f18 b8401e1 532dc11 4300fed e04b055 70cbf96 881961f 532dc11 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# WebUI by mrfakename <X @realmrfakename / HF @mrfakename>
# Demo also available on HF Spaces: https://huggingface.co/spaces/mrfakename/MeloTTS
import gradio as gr
import os, torch, io
os.system('python -m unidic download')
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import nltk
import tempfile
nltk.download('averaged_perceptron_tagger_eng')
speed = 1.0
device = 'cuda' if torch.cuda.is_available() else 'cpu'
models = {
'EN': TTS(language='EN', device=device),
'ES': TTS(language='ES', device=device),
'FR': TTS(language='FR', device=device),
'ZH': TTS(language='ZH', device=device),
'JP': TTS(language='JP', device=device),
'KR': TTS(language='KR', device=device),
}
speaker_ids = models['EN'].hps.data.spk2id
default_text_dict = {
'EN': 'The field of text-to-speech has seen rapid development recently.',
'ES': 'El campo de la conversión de texto a voz ha experimentado un rápido desarrollo recientemente.',
'FR': 'Le domaine de la synthèse vocale a connu un développement rapide récemment',
'ZH': 'text-to-speech 领域近年来发展迅速',
'JP': 'テキスト読み上げの分野は最近急速な発展を遂げています',
'KR': '최근 텍스트 음성 변환 분야가 급속도로 발전하고 있습니다.',
}
def synthesize(text, speaker, speed, language, progress=gr.Progress()):
bio = io.BytesIO()
models[language].tts_to_file(text, models[language].hps.data.spk2id[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav')
return bio.getvalue()
def load_speakers(language, text):
if text in list(default_text_dict.values()):
newtext = default_text_dict[language]
else:
newtext = text
return gr.update(value=list(models[language].hps.data.spk2id.keys())[0], choices=list(models[language].hps.data.spk2id.keys())), newtext
with gr.Blocks() as demo:
gr.Markdown('# MeloTTS Demo\n\nAn unofficial demo for [MeloTTS](https://github.com/myshell-ai/MeloTTS). **Make sure to try out several speakers, for example EN-Default!**')
with gr.Group():
speaker = gr.Dropdown(speaker_ids.keys(), interactive=True, value='EN-US', label='Speaker')
language = gr.Radio(['EN', 'ES', 'FR', 'ZH', 'JP', 'KR'], label='Language', value='EN')
speed = gr.Slider(label='Speed', minimum=0.1, maximum=10.0, value=1.0, interactive=True, step=0.1)
text = gr.Textbox(label="Text to speak", value=default_text_dict['EN'])
language.input(load_speakers, inputs=[language, text], outputs=[speaker, text])
btn = gr.Button('Synthesize', variant='primary')
aud = gr.Audio(interactive=False)
btn.click(synthesize, inputs=[text, speaker, speed, language], outputs=[aud])
gr.Markdown('Demo by [mrfakename](https://twitter.com/realmrfakename).')
demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)
|