Vishwas1 commited on
Commit
e327671
·
verified ·
1 Parent(s): 3f13e9e

Upload 5 files

Browse files
Files changed (4) hide show
  1. README.md +2 -2
  2. app.py +3 -3
  3. app_simple.py +96 -0
  4. requirements.txt +1 -1
README.md CHANGED
@@ -4,8 +4,8 @@ emoji: 🎤
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 4.44.0
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
 
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.44.1
8
+ app_file: app_simple.py
9
  pinned: false
10
  license: mit
11
  ---
app.py CHANGED
@@ -138,8 +138,7 @@ def create_demo():
138
  ["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
139
  ["This model works without requiring a GPU.", "expr-voice-3-m"],
140
  ],
141
- inputs=[text_input, voice_dropdown],
142
- label="Try these examples:"
143
  )
144
 
145
  # Footer
@@ -172,5 +171,6 @@ if __name__ == "__main__":
172
  demo.launch(
173
  server_name="0.0.0.0",
174
  server_port=7860,
175
- share=False
 
176
  )
 
138
  ["Welcome to our high-quality text-to-speech system.", "expr-voice-3-f"],
139
  ["This model works without requiring a GPU.", "expr-voice-3-m"],
140
  ],
141
+ inputs=[text_input, voice_dropdown]
 
142
  )
143
 
144
  # Footer
 
171
  demo.launch(
172
  server_name="0.0.0.0",
173
  server_port=7860,
174
+ share=True,
175
+ debug=False
176
  )
app_simple.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import soundfile as sf
3
+ import numpy as np
4
+ from kittentts import KittenTTS
5
+
6
+ # Initialize the model
7
+ model = KittenTTS("KittenML/kitten-tts-nano-0.1")
8
+
9
+ # Available voices
10
+ AVAILABLE_VOICES = [
11
+ 'expr-voice-2-m', 'expr-voice-2-f', 'expr-voice-3-m', 'expr-voice-3-f',
12
+ 'expr-voice-4-m', 'expr-voice-4-f', 'expr-voice-5-m', 'expr-voice-5-f'
13
+ ]
14
+
15
+ def generate_speech(text, voice):
16
+ """Generate speech from text using KittenTTS"""
17
+ if not text.strip():
18
+ return None, "Please enter some text to generate speech."
19
+
20
+ try:
21
+ # Generate audio
22
+ audio = model.generate(text, voice=voice)
23
+
24
+ # Convert to the format expected by Gradio
25
+ if len(audio.shape) > 1:
26
+ audio = audio.mean(axis=1) # Convert stereo to mono if needed
27
+
28
+ # Normalize audio
29
+ audio = audio / np.max(np.abs(audio)) if np.max(np.abs(audio)) > 0 else audio
30
+
31
+ return audio, f"✅ Successfully generated speech with voice: {voice}"
32
+
33
+ except Exception as e:
34
+ return None, f"❌ Error generating speech: {str(e)}"
35
+
36
+ # Create the interface
37
+ with gr.Blocks(title="KittenTTS - High Quality Text-to-Speech") as demo:
38
+
39
+ gr.HTML("""
40
+ <div style="text-align: center; margin-bottom: 2rem;">
41
+ <h1>🎤 KittenTTS</h1>
42
+ <p><em>High Quality Text-to-Speech Generation</em></p>
43
+ </div>
44
+ """)
45
+
46
+ with gr.Row():
47
+ with gr.Column():
48
+ text_input = gr.Textbox(
49
+ label="Enter your text",
50
+ placeholder="Type or paste your text here...",
51
+ lines=4
52
+ )
53
+
54
+ voice_dropdown = gr.Dropdown(
55
+ choices=AVAILABLE_VOICES,
56
+ value=AVAILABLE_VOICES[1],
57
+ label="Select Voice"
58
+ )
59
+
60
+ generate_btn = gr.Button("🎵 Generate Speech", variant="primary")
61
+
62
+ with gr.Column():
63
+ gr.HTML("""
64
+ <div style="background: #f0f0f0; padding: 1rem; border-radius: 8px;">
65
+ <h3>Available Voices:</h3>
66
+ <ul>
67
+ <li><strong>Male:</strong> expr-voice-2-m, expr-voice-3-m, expr-voice-4-m, expr-voice-5-m</li>
68
+ <li><strong>Female:</strong> expr-voice-2-f, expr-voice-3-f, expr-voice-4-f, expr-voice-5-f</li>
69
+ </ul>
70
+ </div>
71
+ """)
72
+
73
+ audio_output = gr.Audio(label="Generated Audio")
74
+ status_output = gr.Textbox(label="Status", interactive=False)
75
+
76
+ # Connect the generate button
77
+ generate_btn.click(
78
+ fn=generate_speech,
79
+ inputs=[text_input, voice_dropdown],
80
+ outputs=[audio_output, status_output]
81
+ )
82
+
83
+ # Auto-generate when text is entered and Enter is pressed
84
+ text_input.submit(
85
+ fn=generate_speech,
86
+ inputs=[text_input, voice_dropdown],
87
+ outputs=[audio_output, status_output]
88
+ )
89
+
90
+ # Launch the demo
91
+ if __name__ == "__main__":
92
+ demo.launch(
93
+ server_name="0.0.0.0",
94
+ server_port=7860,
95
+ share=True
96
+ )
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- gradio>=4.44.0
2
  https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
3
  soundfile
4
  numpy
 
1
+ gradio==4.44.1
2
  https://github.com/KittenML/KittenTTS/releases/download/0.1/kittentts-0.1.0-py3-none-any.whl
3
  soundfile
4
  numpy