Spaces:

KIMOSSINO
/

multilingual

Sleeping

App Files Files Community

KIMOSSINO commited on Dec 7, 2024

Commit

9f9087b

verified ·

1 Parent(s): eb0019e

Create app.py

Browse files

Files changed (1) hide show

app.py +119 -0

app.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import gradio as gr
+from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+import torch
+import scipy
+# Load models and processor
+processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+# Speaker embeddings
+speaker_embeddings = torch.randn(1, 512)  # Random speaker embedding
+LANGUAGES = {
+    "English": "en",
+    "French": "fr",
+    "Spanish": "es"
+}
+def text_to_speech(text, language, speaker_type, speed):
+    try:
+        # Adjust speaker embeddings based on speaker type
+        if speaker_type == "Female":
+            speaker_embeddings = torch.randn(1, 512) * 0.8
+        else:
+            speaker_embeddings = torch.randn(1, 512) * 1.2
+        # Process input text
+        inputs = processor(text=text, return_tensors="pt")
+        # Generate speech
+        speech = model.generate_speech(
+            inputs["input_ids"],
+            speaker_embeddings,
+            vocoder=vocoder
+        )
+        # Adjust speed
+        if speed != 1.0:
+            speech = torch.nn.functional.interpolate(
+                speech.unsqueeze(0).unsqueeze(0),
+                scale_factor=1/speed,
+                mode='linear',
+                align_corners=False
+            ).squeeze()
+        # Convert to numpy array
+        speech = speech.numpy()
+        return (24000, speech)  # 24kHz sampling rate
+    except Exception as e:
+        print(f"Error in text_to_speech: {str(e)}")
+        return None
+# Create Gradio interface
+def create_interface():
+    with gr.Blocks(theme=gr.themes.Soft(
+        primary_hue="blue",
+        secondary_hue="gray",
+    )) as demo:
+        gr.Markdown(
+            """
+            # 🎙️ Multilingual Text-to-Speech
+            Convert text to natural-sounding speech in multiple languages.
+            """
+        )
+        with gr.Row():
+            with gr.Column():
+                text_input = gr.Textbox(
+                    label="Enter Text",
+                    placeholder="Type your text here...",
+                    lines=5
+                )
+                language = gr.Dropdown(
+                    choices=list(LANGUAGES.keys()),
+                    value="English",
+                    label="Language"
+                )
+                speaker = gr.Radio(
+                    choices=["Male", "Female"],
+                    value="Male",
+                    label="Speaker Gender"
+                )
+                speed = gr.Slider(
+                    minimum=0.5,
+                    maximum=2.0,
+                    value=1.0,
+                    step=0.1,
+                    label="Speech Speed"
+                )
+                submit_btn = gr.Button("Generate Speech", variant="primary")
+            with gr.Column():
+                audio_output = gr.Audio(
+                    label="Generated Speech",
+                    type="numpy"
+                )
+        submit_btn.click(
+            fn=text_to_speech,
+            inputs=[text_input, language, speaker, speed],
+            outputs=audio_output
+        )
+        gr.Markdown(
+            """
+            ### Features:
+            - Support for English, French, and Spanish
+            - Male and Female voice options
+            - Adjustable speech speed
+            - High-quality, natural-sounding voices
+            """
+        )
+    return demo
+demo = create_interface()
+demo.launch()