Spaces:

sablab
/

F5

Sleeping

sablab commited on Jul 16

Commit

8a7a5ec

verified ·

1 Parent(s): 1a1e909

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,17 +3,17 @@ import torch
 from transformers import VitsModel, VitsTokenizer
 # --- 1. Load Model and Tokenizer ---
-# Load the pretrained model and tokenizer from Hugging Face.
-# This is done once when the app starts, not for every prediction.
-print("Loading F5-TTS model and tokenizer...")
-model = VitsModel.from_pretrained("SWivid/F5-TTS")
-tokenizer = VitsTokenizer.from_pretrained("SWivid/F5-TTS")
 print("Model and tokenizer loaded successfully.")
 # --- 2. Define the Speech Synthesis Function ---
 def synthesize_speech(text):
     """
-    Converts text to speech using the F5-TTS model.
     """
     # Tokenize the input text. The `return_tensors="pt"` part formats it for PyTorch.
     inputs = tokenizer(text, return_tensors="pt")
@@ -39,14 +39,14 @@ demo = gr.Interface(
     inputs=gr.Textbox(
         label="Text to Synthesize",
         info="Enter the text you want to convert to speech.",
-        value="Hello, this is a demonstration of the F5 text to speech model."
     ),
     outputs=gr.Audio(
         label="Synthesized Audio",
         type="numpy"  # The function returns a NumPy array
     ),
-    title="🗣️ F5-TTS Text-to-Speech",
-    description="A simple Gradio app to run the `SWivid/F5-TTS` model for text-to-speech conversion. Built by Gemini.",
     examples=[
         ["The quick brown fox jumps over the lazy dog."],
         ["To be, or not to be, that is the question."],

 from transformers import VitsModel, VitsTokenizer
 # --- 1. Load Model and Tokenizer ---
+# NOTE: Switched to a compatible model that has the correct file structure.
+print("Loading facebook/mms-tts-eng model and tokenizer...")
+model_id = "facebook/mms-tts-eng"
+model = VitsModel.from_pretrained(model_id)
+tokenizer = VitsTokenizer.from_pretrained(model_id)
 print("Model and tokenizer loaded successfully.")
 # --- 2. Define the Speech Synthesis Function ---
 def synthesize_speech(text):
     """
+    Converts text to speech using the selected TTS model.
     """
     # Tokenize the input text. The `return_tensors="pt"` part formats it for PyTorch.
     inputs = tokenizer(text, return_tensors="pt")
     inputs=gr.Textbox(
         label="Text to Synthesize",
         info="Enter the text you want to convert to speech.",
+        value="Hello, this is a demonstration of the Facebook MMS text to speech model."
     ),
     outputs=gr.Audio(
         label="Synthesized Audio",
         type="numpy"  # The function returns a NumPy array
     ),
+    title="🗣️ MMS Text-to-Speech (English)",
+    description="A Gradio app to run the `facebook/mms-tts-eng` model for text-to-speech conversion.",
     examples=[
         ["The quick brown fox jumps over the lazy dog."],
         ["To be, or not to be, that is the question."],