miya3333 commited on
Commit
dc4f75a
·
verified ·
1 Parent(s): 881ca56

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +15 -9
  2. requirements.txt +1 -1
app.py CHANGED
@@ -1,23 +1,29 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
- import soundfile as sf
4
  import torch
 
 
5
 
6
- # モデルのロード (例: speechbrain/tts-hifigan-ljspeech)
7
- synthesizer = pipeline("text-to-speech", "speechbrain/tts-hifigan-ljspeech")
 
8
 
9
  # 推論関数の定義
10
  def synthesize_speech(text):
11
- with torch.no_grad():
12
- output = synthesizer(text)
13
- sf.write("speech.wav", output["audio"], output["sampling_rate"]) #numpy arrayからwavファイルに変換
14
- return "speech.wav"
 
 
 
 
 
15
 
16
  # Gradioインターフェースの作成
17
  iface = gr.Interface(
18
  fn=synthesize_speech,
19
  inputs=gr.Textbox(lines=5, label="Input Text"),
20
- outputs=gr.Audio(label="Output Audio"),
21
  title="TTS Demo",
22
  description="Enter text to synthesize speech."
23
  )
 
1
  import gradio as gr
 
 
2
  import torch
3
+ from speechbrain.pretrained import Tacotron2
4
+ from speechbrain.pretrained import HIFIGAN
5
 
6
+ # モデルのロード
7
+ hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
8
+ tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
9
 
10
  # 推論関数の定義
11
  def synthesize_speech(text):
12
+ # Tacotron2でmel spectrogramを生成
13
+ mel_output, _, _ = tacotron2.encode_text(text)
14
+
15
+ # HiFi-GANでmel spectrogramから音声を生成
16
+ waveforms = hifi_gan.decode_batch(mel_output)
17
+
18
+ # torch tensorをwavfileとして保存
19
+ torch.save(waveforms, "speech.pt")
20
+ return "speech.pt"
21
 
22
  # Gradioインターフェースの作成
23
  iface = gr.Interface(
24
  fn=synthesize_speech,
25
  inputs=gr.Textbox(lines=5, label="Input Text"),
26
+ outputs=gr.Audio(label="Output Audio", type="filepath"),
27
  title="TTS Demo",
28
  description="Enter text to synthesize speech."
29
  )
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
  gradio
2
- transformers
3
  torch
4
  soundfile
 
1
  gradio
2
+ speechbrain
3
  torch
4
  soundfile