miya3333 commited on
Commit
978608b
·
verified ·
1 Parent(s): dc4f75a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -3
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import torch
3
- from speechbrain.pretrained import Tacotron2
4
- from speechbrain.pretrained import HIFIGAN
5
 
6
  # モデルのロード
7
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -9,8 +9,14 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
9
 
10
  # 推論関数の定義
11
  def synthesize_speech(text):
 
 
 
 
 
 
12
  # Tacotron2でmel spectrogramを生成
13
- mel_output, _, _ = tacotron2.encode_text(text)
14
 
15
  # HiFi-GANでmel spectrogramから音声を生成
16
  waveforms = hifi_gan.decode_batch(mel_output)
 
1
  import gradio as gr
2
  import torch
3
+ from speechbrain.inference.TTS import Tacotron2
4
+ from speechbrain.inference.vocoders import HIFIGAN
5
 
6
  # モデルのロード
7
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 
9
 
10
  # 推論関数の定義
11
  def synthesize_speech(text):
12
+ # テキストをトークンIDに変換
13
+ tokens = tacotron2.tokenize(text)
14
+
15
+ # トークンIDをLong型のテンソルに変換
16
+ tokens = torch.LongTensor(tokens)
17
+
18
  # Tacotron2でmel spectrogramを生成
19
+ mel_output, mel_length, alignment = tacotron2.encode_batch(tokens)
20
 
21
  # HiFi-GANでmel spectrogramから音声を生成
22
  waveforms = hifi_gan.decode_batch(mel_output)