Neomindapp commited on
Commit
f9d0c83
·
verified ·
1 Parent(s): e68f823

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -52
app.py CHANGED
@@ -1,64 +1,34 @@
1
- import torch
2
  import gradio as gr
3
- import json
4
- import numpy as np
5
- import soundfile as sf
6
 
7
- # Import your Glow-TTS model and related utilities
8
- from glow_tts.models import GlowTTS
9
- from glow_tts.utils import text_to_sequence, sequence_to_mel # Replace with actual functions if different
 
10
 
11
- # Define paths to your model and configuration (relative paths)
12
- MODEL_PATH = 'best_model.pth'
13
- CONFIG_PATH = 'config.json'
14
 
15
- # Load configuration and model
16
- def load_model(model_path, config_path):
17
- # Load the model configuration
18
- with open(config_path, 'r') as f:
19
- config = json.load(f)
20
-
21
- # Initialize the Glow-TTS model
22
- model = GlowTTS(config)
23
-
24
- # Load the trained model weights
25
- model.load_state_dict(torch.load(model_path))
26
- model.eval()
27
-
28
- return model
29
-
30
- # Load the model
31
- model = load_model(MODEL_PATH, CONFIG_PATH)
32
-
33
- # Define the function to generate speech
34
  def generate_speech(text):
35
- # Convert text to sequence
36
- sequence = text_to_sequence(text)
37
- inputs = torch.tensor(sequence).unsqueeze(0) # Add batch dimension
38
 
39
- with torch.no_grad():
40
- # Generate mel spectrogram from text sequence
41
- mel_output = model(inputs)
42
-
43
- # Convert mel spectrogram to waveform
44
- # This step might require a vocoder (e.g., HiFi-GAN) to convert mel spectrograms to audio
45
- audio_waveform = mel_to_audio(mel_output) # Replace with actual conversion if needed
46
-
47
- # Save the waveform to a temporary file
48
- temp_file = 'temp.wav'
49
- sf.write(temp_file, audio_waveform, 22050) # Adjust sample rate if necessary
50
 
51
- return temp_file
 
 
52
 
53
- # Define Gradio interface
54
- interface = gr.Interface(
55
  fn=generate_speech,
56
- inputs="text",
57
- outputs="audio",
58
- title="Glow-TTS Model",
59
- description="Generate speech from text using the Glow-TTS model."
60
  )
61
 
62
- # Launch the Gradio interface
63
  if __name__ == "__main__":
64
- interface.launch()
 
 
1
  import gradio as gr
2
+ import torch
3
+ from your_model_module import YourTTSModel, YourTTSProcessor # Replace with your actual imports
 
4
 
5
+ # Load the model and processor
6
+ model = YourTTSModel.from_pretrained("config.json")
7
+ model.load_state_dict(torch.load("best_model.pth"))
8
+ model.eval() # Set the model to evaluation mode
9
 
10
+ processor = YourTTSProcessor.from_pretrained("config.json")
 
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def generate_speech(text):
13
+ # Process the input text
14
+ inputs = processor(text, return_tensors="pt")
 
15
 
16
+ # Generate speech using the model
17
+ with torch.no_grad(): # No need to compute gradients
18
+ outputs = model.generate(**inputs)
 
 
 
 
 
 
 
 
19
 
20
+ # Process the output to an audio format
21
+ audio = outputs.squeeze().numpy() # Adjust this based on how your model outputs data
22
+ return audio
23
 
24
+ # Define the Gradio interface
25
+ iface = gr.Interface(
26
  fn=generate_speech,
27
+ inputs=gr.Textbox(lines=2, placeholder="Enter text here..."),
28
+ outputs=gr.Audio(type="numpy"),
29
+ title="Text-to-Speech with Coqui TTS",
30
+ description="Generate speech from text using a custom Coqui TTS model."
31
  )
32
 
 
33
  if __name__ == "__main__":
34
+ iface.launch()