sudoping01 commited on
Commit
3f89022
·
verified ·
1 Parent(s): 13036ad

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -0
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import os
4
+ import spaces
5
+ from huggingface_hub import login
6
+ from maliba_ai.tts.inference import BambaraTTSInference
7
+ from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
8
+
9
+
10
+ hf_token = os.getenv("HF_TOKEN")
11
+ if hf_token:
12
+ login(token=hf_token)
13
+
14
+
15
+ print("Loading Bambara TTS model...")
16
+ tts = BambaraTTSInference()
17
+ print("Model loaded successfully!")
18
+
19
+
20
+ SPEAKERS = {
21
+ "Adame": Adame,
22
+ "Moussa": Moussa,
23
+ "Bourama": Bourama,
24
+ "Modibo": Modibo,
25
+ "Seydou": Seydou
26
+ }
27
+
28
+ def validate_inputs(text, temperature, top_k, top_p, max_tokens):
29
+ """Validate user inputs"""
30
+ if not text or not text.strip():
31
+ return False, "Please enter some Bambara text."
32
+
33
+ if not (0.001 <= temperature <= 1):
34
+ return False, "Temperature must be between positive"
35
+
36
+ if not (1 <= top_k <= 100):
37
+ return False, "Top-K must be between 1 and 100"
38
+
39
+ if not (0.1 <= top_p <= 1.0):
40
+ return False, "Top-P must be between 0.1 and 1.0"
41
+
42
+ return True, ""
43
+
44
+ @spaces.GPU()
45
+ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
46
+
47
+ if not text.strip():
48
+ return None, "Please enter some Bambara text."
49
+
50
+ try:
51
+
52
+ speaker = SPEAKERS[speaker_name]
53
+
54
+ if use_advanced:
55
+ is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
56
+ if not is_valid:
57
+ return None, f"❌ {error_msg}"
58
+
59
+ waveform = tts.generate_speech(
60
+ text=text.strip(),
61
+ speaker_id=speaker,
62
+ temperature=temperature,
63
+ top_k=int(top_k),
64
+ top_p=top_p,
65
+ max_new_audio_tokens=int(max_tokens)
66
+ )
67
+ else:
68
+ waveform = tts.generate_speech(
69
+ text=text.strip(),
70
+ speaker_id=speaker
71
+ )
72
+
73
+ if waveform.size == 0:
74
+ return None, "Failed to generate audio. Please try again."
75
+
76
+ sample_rate = 16000
77
+ return (sample_rate, waveform), f"✅ Audio generated successfully"
78
+
79
+ except Exception as e:
80
+ return None, f"❌ Error: {str(e)}"
81
+
82
+
83
+ examples = [
84
+
85
+ ["Aw ni ce", "Adame"],
86
+ ["I ni ce", "Moussa"],
87
+ ["Aw ni tile", "Bourama"],
88
+ ["I ka kene wa?", "Modibo"],
89
+ ["Ala ka Mali suma", "Adame"],
90
+
91
+ ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ", "Seydou"],
92
+
93
+
94
+ ["Aw ni ce. Ne tɔgɔ ye Kaya Magan. Aw Sanbe Sanbe.", "Moussa"],
95
+ ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Bourama"],
96
+ ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Modibo"],
97
+ ]
98
+
99
+ # Create Gradio interface
100
+ with gr.Blocks(title="Bambara TTS - EXPERIMENTAL", theme=gr.themes.Soft()) as demo:
101
+ gr.Markdown("""
102
+ # 🎤 Bambara Text-to-Speech ⚠️ EXPERIMENTAL
103
+
104
+ Convert Bambara text to speech using AI. This model is currently experimental.
105
+
106
+ **Bambara** is spoken by millions of people in Mali and West Africa.
107
+ """)
108
+
109
+ with gr.Row():
110
+ with gr.Column(scale=2):
111
+ # Input section
112
+ text_input = gr.Textbox(
113
+ label="📝 Bambara Text",
114
+ placeholder="Type your Bambara text here...",
115
+ lines=3,
116
+ max_lines=6,
117
+ value="Aw ni ce"
118
+ )
119
+
120
+ speaker_dropdown = gr.Dropdown(
121
+ choices=list(SPEAKERS.keys()),
122
+ value="Adame",
123
+ label="🗣️ Speaker Voice"
124
+ )
125
+
126
+ generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
127
+
128
+ with gr.Column(scale=1):
129
+
130
+ use_advanced = gr.Checkbox(
131
+ label="⚙️ Use Advanced Settings",
132
+ value=False,
133
+ info="Enable to customize generation parameters"
134
+ )
135
+
136
+ with gr.Group(visible=False) as advanced_group:
137
+ gr.Markdown("**Advanced Parameters:**")
138
+
139
+ temperature = gr.Slider(
140
+ minimum=0.1,
141
+ maximum=2.0,
142
+ value=0.8,
143
+ step=0.1,
144
+ label="Temperature",
145
+ info="Higher = more varied"
146
+ )
147
+
148
+ top_k = gr.Slider(
149
+ minimum=1,
150
+ maximum=100,
151
+ value=50,
152
+ step=5,
153
+ label="Top-K"
154
+ )
155
+
156
+ top_p = gr.Slider(
157
+ minimum=0.1,
158
+ maximum=1.0,
159
+ value=0.9,
160
+ step=0.05,
161
+ label="Top-P"
162
+ )
163
+
164
+ max_tokens = gr.Slider(
165
+ minimum=256,
166
+ maximum=4096,
167
+ value=2048,
168
+ step=256,
169
+ label="Max Length"
170
+ )
171
+
172
+ gr.Markdown("### 🔊 Generated Audio")
173
+
174
+ audio_output = gr.Audio(
175
+ label="Generated Speech",
176
+ type="numpy",
177
+ interactive=False
178
+ )
179
+
180
+ status_output = gr.Textbox(
181
+ label="Status",
182
+ interactive=False,
183
+ show_label=False,
184
+ container=False
185
+ )
186
+
187
+ with gr.Accordion("📚 Try These Examples", open=True):
188
+ def load_example(text, speaker):
189
+ return text, speaker, False, 0.8, 50, 0.9, 2048
190
+
191
+ gr.Markdown("**Click any example below:**")
192
+ example_buttons = []
193
+
194
+ for i, (text, speaker) in enumerate(examples):
195
+ btn = gr.Button(f"🎯 {text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
196
+ btn.click(
197
+ fn=lambda t=text, s=speaker: load_example(t, s),
198
+ outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
199
+ )
200
+
201
+ # Information section
202
+ with gr.Accordion("ℹ️ About", open=False):
203
+ gr.Markdown("""
204
+ **⚠️ This is an experimental Bambara TTS model.**
205
+
206
+ """)
207
+
208
+ def toggle_advanced(use_adv):
209
+ return gr.Group(visible=use_adv)
210
+
211
+ use_advanced.change(
212
+ fn=toggle_advanced,
213
+ inputs=[use_advanced],
214
+ outputs=[advanced_group]
215
+ )
216
+
217
+ generate_btn.click(
218
+ fn=generate_speech,
219
+ inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
220
+ outputs=[audio_output, status_output]
221
+ )
222
+
223
+ text_input.submit(
224
+ fn=generate_speech,
225
+ inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
226
+ outputs=[audio_output, status_output]
227
+ )
228
+
229
+ if __name__ == "__main__":
230
+ demo.launch(
231
+ server_name="0.0.0.0",
232
+ server_port=7860,
233
+ share=False
234
+ )