Spaces:

Vishwas1
/

KittenTTSDemo

Running

App Files Files Community

Vishwas1 commited on 16 days ago

Commit

e327671

verified ·

1 Parent(s): 3f13e9e

Upload 5 files

Browse files

Files changed (4) hide show

README.md +2 -2
app.py +3 -3
app_simple.py +96 -0
requirements.txt +1 -1

README.md CHANGED Viewed

@@ -4,8 +4,8 @@ emoji: 🎤
 colorFrom: blue
 colorTo: purple
 sdk: gradio
-sdk_version: 4.44.0
-app_file: app.py
 pinned: false
 license: mit
 ---

 colorFrom: blue
 colorTo: purple
 sdk: gradio
+sdk_version: 4.44.1
+app_file: app_simple.py
 pinned: false
 license: mit
 ---

app.py CHANGED Viewed

@@ -138,8 +138,7 @@ def create_demo():
                 ["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
                 ["This model works without requiring a GPU.", "expr-voice-3-m"],
             ],
-            inputs=[text_input, voice_dropdown],
-            label="Try these examples:"
         )
         # Footer
@@ -172,5 +171,6 @@ if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        share=False
     )

                 ["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
                 ["This model works without requiring a GPU.", "expr-voice-3-m"],
             ],
+            inputs=[text_input, voice_dropdown]
         )
         # Footer
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        share=True,
+        debug=False
     )

app_simple.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+import soundfile as sf
+import numpy as np
+from kittentts import KittenTTS
+# Initialize the model
+model = KittenTTS("KittenML/kitten-tts-nano-0.1")
+# Available voices
+AVAILABLE_VOICES = [
+    'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
+    'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
+]
+def generate_speech(text, voice):
+    """Generate speech from text using KittenTTS"""
+    if not text.strip():
+        return None, "Please enter some text to generate speech."
+    try:
+        # Generate audio
+        audio = model.generate(text, voice=voice)
+        # Convert to the format expected by Gradio
+        if len(audio.shape) > 1:
+            audio = audio.mean(axis=1)  # Convert stereo to mono if needed
+        # Normalize audio
+        audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
+        return audio, f"✅ Successfully generated speech with voice: {voice}"
+    except Exception as e:
+        return None, f"❌ Error generating speech: {str(e)}"
+# Create the interface
+with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo:
+    gr.HTML("""
+    <div style="text-align: center; margin-bottom: 2rem;">
+        <h1>🎤 KittenTTS</h1>
+        <p><em>High Quality Text-to-Speech Generation</em></p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                label="Enter your text",
+                placeholder="Type or paste your text here...",
+                lines=4
+            )
+            voice_dropdown = gr.Dropdown(
+                choices=AVAILABLE_VOICES,
+                value=AVAILABLE_VOICES[1],
+                label="Select Voice"
+            )
+            generate_btn = gr.Button("🎵 Generate Speech", variant="primary")
+        with gr.Column():
+            gr.HTML("""
+            <div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;">
+                <h3>Available Voices:</h3>
+                <ul>
+                    <li><strong>Male:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li>
+                    <li><strong>Female:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li>
+                </ul>
+            </div>
+            """)
+    audio_output = gr.Audio(label="Generated Audio")
+    status_output = gr.Textbox(label="Status", interactive=False)
+    # Connect the generate button
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[text_input, voice_dropdown],
+        outputs=[audio_output, status_output]
+    )
+    # Auto-generate when text is entered and Enter is pressed
+    text_input.submit(
+        fn=generate_speech,
+        inputs=[text_input, voice_dropdown],
+        outputs=[audio_output, status_output]
+    )
+# Launch the demo
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True
+    )

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-gradio>=4.44.0
 https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 soundfile
 numpy

+gradio==4.44.1
 https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
 soundfile
 numpy