Spaces:

Vinay15
/

Fine-tuning_TTS_for_a_Regional_Language

Runtime error

App Files Files Community

Vinay15 commited on Oct 24, 2024

Commit

bdbc025

verified ·

1 Parent(s): c1db09f

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -5

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ import torch
 from datasets import load_dataset
 from transformers import SpeechT5Processor, SpeechT5HifiGan, SpeechT5ForTextToSpeech
-# Load the fine-tuned model and vocoder for Italian from the new model ID
-model_id = "Vinay15/speecht5_finetuned_voxpopuli_it"
 model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
@@ -12,7 +12,7 @@ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
 speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
-# Load processor for the new Italian model
 processor = SpeechT5Processor.from_pretrained(model_id)
 # Optional: Text cleanup for Italian-specific characters
@@ -27,7 +27,7 @@ replacements = [
 # Text-to-speech synthesis function
 def synthesize_speech(text):
-    # Clean up text for Italian-specific accents
     for src, dst in replacements:
         text = text.replace(src, dst)
@@ -47,6 +47,20 @@ This demo generates speech in Italian using the fine-tuned SpeechT5 model from H
 The model is fine-tuned on the VoxPopuli Italian dataset.
 """
 # Create Gradio interface
 interface = gr.Interface(
     fn=synthesize_speech,
@@ -54,7 +68,7 @@ interface = gr.Interface(
     outputs=gr.Audio(label="Generated Speech"),
     title=title,
     description=description,
-    examples=["Questa è una dimostrazione di sintesi vocale in italiano."]
 )
 # Launch the interface

 from datasets import load_dataset
 from transformers import SpeechT5Processor, SpeechT5HifiGan, SpeechT5ForTextToSpeech
+# Load the fine-tuned model and vocoder for Italian
+model_id = "Sandiago21/speecht5_finetuned_voxpopuli_it"
 model = SpeechT5ForTextToSpeech.from_pretrained(model_id)
 vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
 embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
 speaker_embeddings = torch.tensor(embeddings_dataset[7440]["xvector"]).unsqueeze(0)
+# Load processor for the Italian model
 processor = SpeechT5Processor.from_pretrained(model_id)
 # Optional: Text cleanup for Italian-specific characters
 # Text-to-speech synthesis function
 def synthesize_speech(text):
+    # Clean up text
     for src, dst in replacements:
         text = text.replace(src, dst)
 The model is fine-tuned on the VoxPopuli Italian dataset.
 """
+# More examples of Italian text
+examples = [
+    "Questa è una dimostrazione di sintesi vocale in italiano.",
+    "La tecnologia della sintesi vocale sta avanzando rapidamente.",
+    "Oggi il tempo è sereno con una leggera brezza.",
+    "Mi chiamo Maria e sto imparando a usare il Text-to-Speech.",
+    "L'intelligenza artificiale cambierà il futuro della comunicazione.",
+    "Benvenuti a Roma, la città eterna, ricca di storia e cultura.",
+    "Spero che questo modello di sintesi vocale possa essere utile per molte applicazioni.",
+    "La pizza è uno dei piatti italiani più famosi al mondo.",
+    "Il mare in estate è calmo e limpido.",
+    "L'inverno in montagna è freddo e coperto di neve."
+]
 # Create Gradio interface
 interface = gr.Interface(
     fn=synthesize_speech,
     outputs=gr.Audio(label="Generated Speech"),
     title=title,
     description=description,
+    examples=examples
 )
 # Launch the interface