Spaces:

miya3333
/

TTSDemo

Running

miya3333 commited on Jan 3

Commit

5c027d2

verified ·

1 Parent(s): e3f6ffa

Upload 2 files

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import gradio as gr
 import torch
-import soundfile as sf
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
-from speechbrain.utils.text_to_sequence import text_to_sequence
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -11,25 +9,16 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
 # 推論関数の定義
 def synthesize_speech(text):
-    # テキストをトークンIDに変換
-    sequence = text_to_sequence(
-      text,
-      tacotron2.hparams.text_cleaners,
-      add_bos_eos=tacotron2.hparams.add_bos_eos,
-      symbol_set=tacotron2.hparams.symbol_set
-    )
-    # 系列をパディング
-    batch = tacotron2.mods.encoder.pad_sequence_pre([torch.tensor(sequence)])
     # Tacotron2でmel spectrogramを生成
-    mel_output, mel_length, alignment = tacotron2.encode_batch(batch)
     # HiFi-GANでmel spectrogramから音声を生成
     waveforms = hifi_gan.decode_batch(mel_output)
-    # 音声を .wav 形式で保存
-    sf.write("speech.wav", waveforms.squeeze().cpu().numpy(), samplerate=hifi_gan.hparams.sample_rate)
-    return "speech.wav"
 # Gradioインターフェースの作成
 iface = gr.Interface(

 import gradio as gr
 import torch
 from speechbrain.inference.TTS import Tacotron2
 from speechbrain.inference.vocoders import HIFIGAN
 # モデルのロード
 hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 # 推論関数の定義
 def synthesize_speech(text):
     # Tacotron2でmel spectrogramを生成
+    # テキストを直接入力として、LongTensorでラップする
+    mel_output, mel_length, alignment = tacotron2.encode_batch([text])
     # HiFi-GANでmel spectrogramから音声を生成
     waveforms = hifi_gan.decode_batch(mel_output)
+    # torch tensorをwavfileとして保存
+    torch.save(waveforms, "speech.pt")
+    return "speech.pt"
 # Gradioインターフェースの作成
 iface = gr.Interface(

requirements.txt CHANGED Viewed

@@ -1,4 +1,3 @@
 gradio
-speechbrain==1.0.0
 torch
-soundfile

 gradio
+speechbrain
 torch