sudoping01 commited on
Commit
3447474
Β·
verified Β·
1 Parent(s): 254ef61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -183
app.py CHANGED
@@ -1,59 +1,72 @@
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
- import os
4
  import spaces
5
- import sys
6
  from huggingface_hub import login
7
 
8
 
 
 
 
 
 
 
 
9
  hf_token = os.getenv("HF_TOKEN")
10
  if hf_token:
11
  login(token=hf_token)
12
 
13
 
14
- try:
15
- from maliba_ai.tts.inference import BambaraTTSInference
16
- from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
17
-
18
 
19
- print("Loading Bambara TTS model...")
20
- tts = BambaraTTSInference()
21
- print("Model loaded successfully!")
 
22
 
23
- MODEL_LOADED = True
24
- except Exception as e:
25
- print(f"Error loading model: {e}")
26
- MODEL_LOADED = False
27
- tts = None
28
-
 
29
 
30
- if MODEL_LOADED:
31
- SPEAKERS = {
32
- "Adame": Adame,
33
- "Moussa": Moussa,
34
- "Bourama": Bourama,
35
- "Modibo": Modibo,
36
- "Seydou": Seydou
37
- }
38
- else:
39
- SPEAKERS = {
40
- "Adame": "Adame",
41
- "Moussa": "Moussa",
42
- "Bourama": "Bourama",
43
- "Modibo": "Modibo",
44
- "Seydou": "Seydou"
45
- }
 
 
46
 
47
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
48
- """Validate user inputs"""
49
  if not text or not text.strip():
50
  return False, "Please enter some Bambara text."
51
 
52
- if len(text.strip()) > 1000:
53
- return False, "Text is too long. Please use shorter text (max 1000 characters)."
54
-
55
- if not (0.1 <= temperature <= 2.0):
56
- return False, "Temperature must be between 0.1 and 2.0"
57
 
58
  if not (1 <= top_k <= 100):
59
  return False, "Top-K must be between 1 and 100"
@@ -61,24 +74,20 @@ def validate_inputs(text, temperature, top_k, top_p, max_tokens):
61
  if not (0.1 <= top_p <= 1.0):
62
  return False, "Top-P must be between 0.1 and 1.0"
63
 
64
- if not (256 <= max_tokens <= 4096):
65
- return False, "Max tokens must be between 256 and 4096"
66
-
67
  return True, ""
68
 
69
  @spaces.GPU()
70
  def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
71
- """Generate speech from Bambara text"""
72
-
73
- if not MODEL_LOADED:
74
- return None, "❌ Model not loaded. Please check the logs for errors."
75
 
76
  if not text.strip():
77
  return None, "Please enter some Bambara text."
78
 
79
  try:
80
 
81
- speaker = SPEAKERS[speaker_name]
 
 
82
 
83
  if use_advanced:
84
  is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
@@ -94,7 +103,6 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
94
  max_new_audio_tokens=int(max_tokens)
95
  )
96
  else:
97
-
98
  waveform = tts.generate_speech(
99
  text=text.strip(),
100
  speaker_id=speaker
@@ -107,9 +115,12 @@ def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p,
107
  return (sample_rate, waveform), f"βœ… Audio generated successfully"
108
 
109
  except Exception as e:
 
110
  return None, f"❌ Error: {str(e)}"
111
 
112
 
 
 
113
  examples = [
114
  ["Aw ni ce", "Adame"],
115
  ["Mali bΙ›na diya kΙ”sΙ›bΙ›, ka a da a kan baara bΙ› ka kΙ›.", "Moussa"],
@@ -123,159 +134,157 @@ examples = [
123
 
124
  ]
125
 
126
-
127
- with gr.Blocks(title="Bambara TTS - EXPERIMENTAL", theme=gr.themes.Soft()) as demo:
128
- gr.Markdown("""
129
- # 🎀 Bambara Text-to-Speech ⚠️ EXPERIMENTAL
130
-
131
- Convert Bambara text to speech using AI. This model is currently experimental.
132
 
133
- **Bambara** is spoken by millions of people in Mali and West Africa.
134
- """)
135
-
136
- with gr.Row():
137
- with gr.Column(scale=2):
138
- # Input section
139
- text_input = gr.Textbox(
140
- label="πŸ“ Bambara Text",
141
- placeholder="Type your Bambara text here...",
142
- lines=3,
143
- max_lines=6,
144
- value="Aw ni ce"
145
- )
146
-
147
- # Speaker selection
148
- speaker_dropdown = gr.Dropdown(
149
- choices=list(SPEAKERS.keys()),
150
- value="Adame",
151
- label="πŸ—£οΈ Speaker Voice"
152
- )
153
-
154
- # Generation button
155
- generate_btn = gr.Button("🎡 Generate Speech", variant="primary", size="lg")
156
-
157
- with gr.Column(scale=1):
158
- # Advanced settings toggle
159
- use_advanced = gr.Checkbox(
160
- label="βš™οΈ Use Advanced Settings",
161
- value=False,
162
- info="Enable to customize generation parameters"
163
- )
164
-
165
- # Advanced settings (hidden by default)
166
- with gr.Group(visible=False) as advanced_group:
167
- gr.Markdown("**Advanced Parameters:**")
168
-
169
- temperature = gr.Slider(
170
- minimum=0.1,
171
- maximum=2.0,
172
- value=0.8,
173
- step=0.1,
174
- label="Temperature",
175
- info="Higher = more varied"
176
  )
177
 
178
- top_k = gr.Slider(
179
- minimum=1,
180
- maximum=100,
181
- value=50,
182
- step=5,
183
- label="Top-K"
184
  )
185
 
186
- top_p = gr.Slider(
187
- minimum=0.1,
188
- maximum=1.0,
189
- value=0.9,
190
- step=0.05,
191
- label="Top-P"
192
- )
193
 
194
- max_tokens = gr.Slider(
195
- minimum=256,
196
- maximum=4096,
197
- value=2048,
198
- step=256,
199
- label="Max Length"
200
  )
201
-
202
- # Output section
203
- gr.Markdown("### πŸ”Š Generated Audio")
204
-
205
- audio_output = gr.Audio(
206
- label="Generated Speech",
207
- type="numpy",
208
- interactive=False
209
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
210
 
211
- status_output = gr.Textbox(
212
- label="Status",
213
- interactive=False,
214
- show_label=False,
215
- container=False
216
- )
217
-
218
- # Examples section
219
- with gr.Accordion("πŸ“š Try These Examples", open=True):
220
- def load_example(text, speaker):
221
- return text, speaker, False, 0.8, 50, 0.9, 2048
222
 
223
- gr.Markdown("**Click any example below:**")
224
- example_buttons = []
 
 
 
 
 
 
 
 
 
 
225
 
226
- for i, (text, speaker) in enumerate(examples):
227
- btn = gr.Button(f"🎯 {text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
228
- btn.click(
229
- fn=lambda t=text, s=speaker: load_example(t, s),
230
- outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
231
- )
232
-
233
- # Information section
234
- with gr.Accordion("ℹ️ About", open=False):
235
- gr.Markdown("""
236
- **⚠️ This is an experimental Bambara TTS model.**
 
237
 
238
- **Common Bambara Phrases:**
239
- - **Aw ni ce** - Hello (formal)
240
- - **I ni ce** - Hello (informal)
241
- - **I ka kene wa?** - How are you?
242
- - **Aw ni tile** - Good afternoon
 
 
243
 
244
- **Available Speakers:** Adame, Moussa, Bourama, Modibo, Seydou
 
245
 
246
- **Tips:**
247
- - Start with default settings
248
- - Use shorter texts for better results
249
- - Try different speakers for variety
250
- """)
251
-
252
- # Toggle advanced settings visibility
253
- def toggle_advanced(use_adv):
254
- return gr.Group(visible=use_adv)
 
 
 
 
 
 
 
 
 
 
255
 
256
- use_advanced.change(
257
- fn=toggle_advanced,
258
- inputs=[use_advanced],
259
- outputs=[advanced_group]
260
- )
261
 
262
- # Wire up the interface
263
- generate_btn.click(
264
- fn=generate_speech,
265
- inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
266
- outputs=[audio_output, status_output]
267
  )
268
 
269
- # Auto-generate on Enter key in text input
270
- text_input.submit(
271
- fn=generate_speech,
272
- inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
273
- outputs=[audio_output, status_output]
274
- )
275
 
276
  if __name__ == "__main__":
277
- demo.launch(
278
- server_name="0.0.0.0",
279
- server_port=7860,
280
- share=False
281
- )
 
1
+ import os
2
+
3
+ os.environ["TORCHDYNAMO_DISABLE"] = "1"
4
+ os.environ["TORCH_COMPILE_DISABLE"] = "1"
5
+ os.environ["PYTORCH_DISABLE_CUDNN_BENCHMARK"] = "1"
6
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
7
+
8
+ import torch
9
  import gradio as gr
10
  import numpy as np
 
11
  import spaces
12
+ import logging
13
  from huggingface_hub import login
14
 
15
 
16
+ torch._dynamo.config.disable = True
17
+ torch._dynamo.config.suppress_errors = True
18
+
19
+
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
  hf_token = os.getenv("HF_TOKEN")
24
  if hf_token:
25
  login(token=hf_token)
26
 
27
 
28
+ tts_model = None
29
+ speakers_dict = None
30
+ model_initialized = False
 
31
 
32
+ @spaces.GPU()
33
+ def initialize_model():
34
+ """Initialize the TTS model and speakers - called once with GPU context"""
35
+ global tts_model, speakers_dict, model_initialized
36
 
37
+ if not model_initialized:
38
+ logger.info("Initializing Bambara TTS model...")
39
+
40
+ try:
41
+ from maliba_ai.tts.inference import BambaraTTSInference
42
+ from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
43
+
44
 
45
+ tts_model = BambaraTTSInference()
46
+
47
+ speakers_dict = {
48
+ "Adame": Adame,
49
+ "Moussa": Moussa,
50
+ "Bourama": Bourama,
51
+ "Modibo": Modibo,
52
+ "Seydou": Seydou
53
+ }
54
+
55
+ model_initialized = True
56
+ logger.info("Model initialized successfully!")
57
+
58
+ except Exception as e:
59
+ logger.error(f"Failed to initialize model: {e}")
60
+ raise e
61
+
62
+ return tts_model, speakers_dict
63
 
64
  def validate_inputs(text, temperature, top_k, top_p, max_tokens):
 
65
  if not text or not text.strip():
66
  return False, "Please enter some Bambara text."
67
 
68
+ if not (0.001 <= temperature <= 2.0):
69
+ return False, "Temperature must be between 0.001 and 2.0"
 
 
 
70
 
71
  if not (1 <= top_k <= 100):
72
  return False, "Top-K must be between 1 and 100"
 
74
  if not (0.1 <= top_p <= 1.0):
75
  return False, "Top-P must be between 0.1 and 1.0"
76
 
 
 
 
77
  return True, ""
78
 
79
  @spaces.GPU()
80
  def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
81
+
 
 
 
82
 
83
  if not text.strip():
84
  return None, "Please enter some Bambara text."
85
 
86
  try:
87
 
88
+ tts, speakers = initialize_model()
89
+
90
+ speaker = speakers[speaker_name]
91
 
92
  if use_advanced:
93
  is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
 
103
  max_new_audio_tokens=int(max_tokens)
104
  )
105
  else:
 
106
  waveform = tts.generate_speech(
107
  text=text.strip(),
108
  speaker_id=speaker
 
115
  return (sample_rate, waveform), f"βœ… Audio generated successfully"
116
 
117
  except Exception as e:
118
+ logger.error(f"Speech generation failed: {e}")
119
  return None, f"❌ Error: {str(e)}"
120
 
121
 
122
+ SPEAKER_NAMES = ["Adame", "Moussa", "Bourama", "Modibo", "Seydou"]
123
+
124
  examples = [
125
  ["Aw ni ce", "Adame"],
126
  ["Mali bΙ›na diya kΙ”sΙ›bΙ›, ka a da a kan baara bΙ› ka kΙ›.", "Moussa"],
 
134
 
135
  ]
136
 
137
+ def build_interface():
138
+ """Build the Gradio interface for Bambara TTS"""
 
 
 
 
139
 
140
+ with gr.Blocks(title="Bambara TTS - EXPERIMENTAL") as demo:
141
+ gr.Markdown("""
142
+ # 🎀 Bambara Text-to-Speech ⚠️ EXPERIMENTAL
143
+
144
+ **Powered by MALIBA-AI**
145
+
146
+ Convert Bambara text to speech. This model is currently experimental.
147
+
148
+ **Bambara** is spoken by millions of people in Mali and West Africa.
149
+ .
150
+ """)
151
+
152
+
153
+
154
+ with gr.Row():
155
+ with gr.Column(scale=2):
156
+ text_input = gr.Textbox(
157
+ label="πŸ“ Bambara Text",
158
+ placeholder="Type your Bambara text here...",
159
+ lines=3,
160
+ max_lines=10,
161
+ value="I ni ce"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  )
163
 
164
+ speaker_dropdown = gr.Dropdown(
165
+ choices=SPEAKER_NAMES,
166
+ value="Adame",
167
+ label="πŸ—£οΈ Speaker Voice"
 
 
168
  )
169
 
170
+ generate_btn = gr.Button("🎡 Generate Speech", variant="primary", size="lg")
 
 
 
 
 
 
171
 
172
+ with gr.Column(scale=1):
173
+ use_advanced = gr.Checkbox(
174
+ label="βš™οΈ Use Advanced Settings",
175
+ value=False,
176
+ info="Enable to customize generation parameters"
 
177
  )
178
+
179
+ with gr.Group(visible=False) as advanced_group:
180
+ gr.Markdown("**Advanced Parameters:**")
181
+
182
+ temperature = gr.Slider(
183
+ minimum=0.1,
184
+ maximum=2.0,
185
+ value=0.8,
186
+ step=0.1,
187
+ label="Temperature",
188
+ info="Higher = more varied"
189
+ )
190
+
191
+ top_k = gr.Slider(
192
+ minimum=1,
193
+ maximum=100,
194
+ value=50,
195
+ step=5,
196
+ label="Top-K"
197
+ )
198
+
199
+ top_p = gr.Slider(
200
+ minimum=0.1,
201
+ maximum=1.0,
202
+ value=0.9,
203
+ step=0.05,
204
+ label="Top-P"
205
+ )
206
+
207
+ max_tokens = gr.Slider(
208
+ minimum=256,
209
+ maximum=4096,
210
+ value=2048,
211
+ step=256,
212
+ label="Max Length"
213
+ )
214
 
215
+ gr.Markdown("### πŸ”Š Generated Audio")
 
 
 
 
 
 
 
 
 
 
216
 
217
+ audio_output = gr.Audio(
218
+ label="Generated Speech",
219
+ type="numpy",
220
+ interactive=False
221
+ )
222
+
223
+ status_output = gr.Textbox(
224
+ label="Status",
225
+ interactive=False,
226
+ show_label=False,
227
+ container=False
228
+ )
229
 
230
+ with gr.Accordion("Try These Examples", open=True):
231
+ def load_example(text, speaker):
232
+ return text, speaker, False, 0.8, 50, 0.9, 2048
233
+
234
+ gr.Markdown("**Click any example below:**")
235
+
236
+ for i, (text, speaker) in enumerate(examples):
237
+ btn = gr.Button(f" {text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
238
+ btn.click(
239
+ fn=lambda t=text, s=speaker: load_example(t, s),
240
+ outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
241
+ )
242
 
243
+ with gr.Accordion(" About", open=False):
244
+ gr.Markdown("""
245
+ **⚠️ This is an experimental Bambara TTS model.**
246
+ - **Languages**: Bambara (bm)
247
+ - **Speakers**: 5 different voice options
248
+ - **Sample Rate**: 16kHz
249
+ """)
250
 
251
+ def toggle_advanced(use_adv):
252
+ return gr.Group(visible=use_adv)
253
 
254
+ use_advanced.change(
255
+ fn=toggle_advanced,
256
+ inputs=[use_advanced],
257
+ outputs=[advanced_group]
258
+ )
259
+
260
+ generate_btn.click(
261
+ fn=generate_speech,
262
+ inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
263
+ outputs=[audio_output, status_output],
264
+ show_progress=True
265
+ )
266
+
267
+ text_input.submit(
268
+ fn=generate_speech,
269
+ inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
270
+ outputs=[audio_output, status_output],
271
+ show_progress=True
272
+ )
273
 
274
+ return demo
275
+
276
+ def main():
277
+ """Main function to launch the Gradio interface"""
278
+ logger.info("Starting Bambara TTS Gradio interface.")
279
 
280
+ interface = build_interface()
281
+ interface.launch(
282
+ server_name="0.0.0.0",
283
+ server_port=7860,
284
+ share=False
285
  )
286
 
287
+ logger.info("Gradio interface launched successfully.")
 
 
 
 
 
288
 
289
  if __name__ == "__main__":
290
+ main()