sablab commited on
Commit
8a7a5ec
·
verified ·
1 Parent(s): 1a1e909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -3,17 +3,17 @@ import torch
3
  from transformers import VitsModel, VitsTokenizer
4
 
5
  # --- 1. Load Model and Tokenizer ---
6
- # Load the pretrained model and tokenizer from Hugging Face.
7
- # This is done once when the app starts, not for every prediction.
8
- print("Loading F5-TTS model and tokenizer...")
9
- model = VitsModel.from_pretrained("SWivid/F5-TTS")
10
- tokenizer = VitsTokenizer.from_pretrained("SWivid/F5-TTS")
11
  print("Model and tokenizer loaded successfully.")
12
 
13
  # --- 2. Define the Speech Synthesis Function ---
14
  def synthesize_speech(text):
15
  """
16
- Converts text to speech using the F5-TTS model.
17
  """
18
  # Tokenize the input text. The `return_tensors="pt"` part formats it for PyTorch.
19
  inputs = tokenizer(text, return_tensors="pt")
@@ -39,14 +39,14 @@ demo = gr.Interface(
39
  inputs=gr.Textbox(
40
  label="Text to Synthesize",
41
  info="Enter the text you want to convert to speech.",
42
- value="Hello, this is a demonstration of the F5 text to speech model."
43
  ),
44
  outputs=gr.Audio(
45
  label="Synthesized Audio",
46
  type="numpy" # The function returns a NumPy array
47
  ),
48
- title="🗣️ F5-TTS Text-to-Speech",
49
- description="A simple Gradio app to run the `SWivid/F5-TTS` model for text-to-speech conversion. Built by Gemini.",
50
  examples=[
51
  ["The quick brown fox jumps over the lazy dog."],
52
  ["To be, or not to be, that is the question."],
 
3
  from transformers import VitsModel, VitsTokenizer
4
 
5
  # --- 1. Load Model and Tokenizer ---
6
+ # NOTE: Switched to a compatible model that has the correct file structure.
7
+ print("Loading facebook/mms-tts-eng model and tokenizer...")
8
+ model_id = "facebook/mms-tts-eng"
9
+ model = VitsModel.from_pretrained(model_id)
10
+ tokenizer = VitsTokenizer.from_pretrained(model_id)
11
  print("Model and tokenizer loaded successfully.")
12
 
13
  # --- 2. Define the Speech Synthesis Function ---
14
  def synthesize_speech(text):
15
  """
16
+ Converts text to speech using the selected TTS model.
17
  """
18
  # Tokenize the input text. The `return_tensors="pt"` part formats it for PyTorch.
19
  inputs = tokenizer(text, return_tensors="pt")
 
39
  inputs=gr.Textbox(
40
  label="Text to Synthesize",
41
  info="Enter the text you want to convert to speech.",
42
+ value="Hello, this is a demonstration of the Facebook MMS text to speech model."
43
  ),
44
  outputs=gr.Audio(
45
  label="Synthesized Audio",
46
  type="numpy" # The function returns a NumPy array
47
  ),
48
+ title="🗣️ MMS Text-to-Speech (English)",
49
+ description="A Gradio app to run the `facebook/mms-tts-eng` model for text-to-speech conversion.",
50
  examples=[
51
  ["The quick brown fox jumps over the lazy dog."],
52
  ["To be, or not to be, that is the question."],