Neomindapp commited on
Commit
5622ebb
·
verified ·
1 Parent(s): f3c0815

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -1,34 +1,48 @@
1
  import torch
2
  import gradio as gr
3
- from transformers import TTSConfig, TTSForConditionalGeneration
4
  import numpy as np
5
  import soundfile as sf
6
 
7
- # Load your model and configuration
 
 
 
 
 
 
 
 
8
  def load_model(model_path, config_path):
9
- config = TTSConfig.from_json_file(config_path)
10
- model = TTSForConditionalGeneration.from_pretrained(model_path, config=config)
 
 
 
 
 
 
 
11
  model.eval()
 
12
  return model
13
 
14
- # Define the path to your model and config
15
- MODEL_PATH = 'path/to/best_model.pth'
16
- CONFIG_PATH = 'path/to/config.json'
17
-
18
  model = load_model(MODEL_PATH, CONFIG_PATH)
19
 
20
  # Define the function to generate speech
21
  def generate_speech(text):
22
- # Convert text to input format
23
- inputs = tokenizer(text, return_tensors="pt")
 
24
 
25
  with torch.no_grad():
26
- # Generate speech
27
- outputs = model.generate(inputs['input_ids'])
28
 
29
- # Convert outputs to numpy array (audio waveform)
30
- # This conversion depends on your model
31
- audio_waveform = outputs.squeeze().numpy()
32
 
33
  # Save the waveform to a temporary file
34
  temp_file = 'temp.wav'
@@ -41,10 +55,10 @@ interface = gr.Interface(
41
  fn=generate_speech,
42
  inputs="text",
43
  outputs="audio",
44
- title="Text-to-Speech Model",
45
- description="Generate speech from text using your TTS model."
46
  )
47
 
48
  # Launch the Gradio interface
49
  if __name__ == "__main__":
50
- interface.launch()
 
1
  import torch
2
  import gradio as gr
3
+ import json
4
  import numpy as np
5
  import soundfile as sf
6
 
7
+ # Import your Glow-TTS model and related utilities
8
+ from glow_tts.models import GlowTTS
9
+ from glow_tts.utils import text_to_sequence, sequence_to_mel # Replace with actual functions if different
10
+
11
+ # Define paths to your model and configuration (relative paths)
12
+ MODEL_PATH = 'best_model.pth'
13
+ CONFIG_PATH = 'config.json'
14
+
15
+ # Load configuration and model
16
  def load_model(model_path, config_path):
17
+ # Load the model configuration
18
+ with open(config_path, 'r') as f:
19
+ config = json.load(f)
20
+
21
+ # Initialize the Glow-TTS model
22
+ model = GlowTTS(config)
23
+
24
+ # Load the trained model weights
25
+ model.load_state_dict(torch.load(model_path))
26
  model.eval()
27
+
28
  return model
29
 
30
+ # Load the model
 
 
 
31
  model = load_model(MODEL_PATH, CONFIG_PATH)
32
 
33
  # Define the function to generate speech
34
  def generate_speech(text):
35
+ # Convert text to sequence
36
+ sequence = text_to_sequence(text)
37
+ inputs = torch.tensor(sequence).unsqueeze(0) # Add batch dimension
38
 
39
  with torch.no_grad():
40
+ # Generate mel spectrogram from text sequence
41
+ mel_output = model(inputs)
42
 
43
+ # Convert mel spectrogram to waveform
44
+ # This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio
45
+ audio_waveform = mel_to_audio(mel_output) # Replace with actual conversion if needed
46
 
47
  # Save the waveform to a temporary file
48
  temp_file = 'temp.wav'
 
55
  fn=generate_speech,
56
  inputs="text",
57
  outputs="audio",
58
+ title="Glow-TTS Model",
59
+ description="Generate speech from text using the Glow-TTS model."
60
  )
61
 
62
  # Launch the Gradio interface
63
  if __name__ == "__main__":
64
+ interface.launch()