Spaces:

miya3333
/

TTSDemo

Running

TTSDemo / app.py

Upload app.py

75d3ee0 verified 6 months ago

1.22 kB

	import gradio as gr
	import torch
	from speechbrain.inference.TTS import Tacotron2
	from speechbrain.inference.vocoders import HIFIGAN
	from scipy.io.wavfile import write

	# モデルのロード
	hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
	tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")

	# 推論関数の定義
	def synthesize_speech(text):
	# Tacotron2でmel spectrogramを生成
	# テキストを直接入力として、LongTensorでラップする
	mel_output, mel_length, alignment = tacotron2.encode_batch([text])

	# HiFi-GANでmel spectrogramから音声を生成
	waveforms = hifi_gan.decode_batch(mel_output)

	# torch tensorをwavfileとして保存
	# sampling rate を 22050に設定
	write("speech.wav", rate=22050, data=waveforms.squeeze().cpu().numpy())
	return "speech.wav"

	# Gradioインターフェースの作成
	iface = gr.Interface(
	fn=synthesize_speech,
	inputs=gr.Textbox(lines=5, label="Input Text"),
	outputs=gr.Audio(label="Output Audio", type="filepath"),
	title="TTS Demo",
	description="Enter text to synthesize speech."
	)

	iface.launch()