Spaces:

miya3333
/

TTSDemo

Running

miya3333 commited on Jan 3

Commit

978608b

verified ·

1 Parent(s): dc4f75a

Upload app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import torch
-from speechbrain.pretrained import Tacotron2
-from speechbrain.pretrained import HIFIGAN
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -9,8 +9,14 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
 # 推論関数の定義
 def synthesize_speech(text):
     # Tacotron2でmel spectrogramを生成
-    mel_output, _, _ = tacotron2.encode_text(text)
     # HiFi-GANでmel spectrogramから音声を生成
     waveforms = hifi_gan.decode_batch(mel_output)

 import gradio as gr
 import torch
+from speechbrain.inference.TTS import Tacotron2
+from speechbrain.inference.vocoders import HIFIGAN
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 # 推論関数の定義
 def synthesize_speech(text):
+    # テキストをトークンIDに変換
+    tokens = tacotron2.tokenize(text)
+    # トークンIDをLong型のテンソルに変換
+    tokens = torch.LongTensor(tokens)
     # Tacotron2でmel spectrogramを生成
+    mel_output, mel_length, alignment = tacotron2.encode_batch(tokens)
     # HiFi-GANでmel spectrogramから音声を生成
     waveforms = hifi_gan.decode_batch(mel_output)