miya3333 commited on
Commit
5c027d2
·
verified ·
1 Parent(s): e3f6ffa

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +5 -16
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import gradio as gr
2
  import torch
3
- import soundfile as sf
4
  from speechbrain.inference.TTS import Tacotron2
5
  from speechbrain.inference.vocoders import HIFIGAN
6
- from speechbrain.utils.text_to_sequence import text_to_sequence
7
 
8
  # モデルのロード
9
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
@@ -11,25 +9,16 @@ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech",
11
 
12
  # 推論関数の定義
13
  def synthesize_speech(text):
14
- # テキストをトークンIDに変換
15
- sequence = text_to_sequence(
16
- text,
17
- tacotron2.hparams.text_cleaners,
18
- add_bos_eos=tacotron2.hparams.add_bos_eos,
19
- symbol_set=tacotron2.hparams.symbol_set
20
- )
21
- # 系列をパディング
22
- batch = tacotron2.mods.encoder.pad_sequence_pre([torch.tensor(sequence)])
23
-
24
  # Tacotron2でmel spectrogramを生成
25
- mel_output, mel_length, alignment = tacotron2.encode_batch(batch)
 
26
 
27
  # HiFi-GANでmel spectrogramから音声を生成
28
  waveforms = hifi_gan.decode_batch(mel_output)
29
 
30
- # 音声を .wav 形式で保存
31
- sf.write("speech.wav", waveforms.squeeze().cpu().numpy(), samplerate=hifi_gan.hparams.sample_rate)
32
- return "speech.wav"
33
 
34
  # Gradioインターフェースの作成
35
  iface = gr.Interface(
 
1
  import gradio as gr
2
  import torch
 
3
  from speechbrain.inference.TTS import Tacotron2
4
  from speechbrain.inference.vocoders import HIFIGAN
 
5
 
6
  # モデルのロード
7
  hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
 
9
 
10
  # 推論関数の定義
11
  def synthesize_speech(text):
 
 
 
 
 
 
 
 
 
 
12
  # Tacotron2でmel spectrogramを生成
13
+ # テキストを直接入力として、LongTensorでラップする
14
+ mel_output, mel_length, alignment = tacotron2.encode_batch([text])
15
 
16
  # HiFi-GANでmel spectrogramから音声を生成
17
  waveforms = hifi_gan.decode_batch(mel_output)
18
 
19
+ # torch tensorをwavfileとして保存
20
+ torch.save(waveforms, "speech.pt")
21
+ return "speech.pt"
22
 
23
  # Gradioインターフェースの作成
24
  iface = gr.Interface(
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
  gradio
2
- speechbrain==1.0.0
3
  torch
4
- soundfile
 
1
  gradio
2
+ speechbrain
3
  torch