Spaces:

MALIBA-AI
/

BambaraText2Speech

Running on Zero

App Files Files Community

sudoping01 commited on 12 days ago

Commit

3f89022

verified ·

1 Parent(s): 13036ad

Create app.py

Browse files

Files changed (1) hide show

app.py +234 -0

app.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import gradio as gr
+import numpy as np
+import os
+import spaces
+from huggingface_hub import login
+from maliba_ai.tts.inference import BambaraTTSInference
+from maliba_ai.config.speakers import Adame, Moussa, Bourama, Modibo, Seydou
+hf_token = os.getenv("HF_TOKEN")
+if hf_token:
+    login(token=hf_token)
+print("Loading Bambara TTS model...")
+tts = BambaraTTSInference()
+print("Model loaded successfully!")
+SPEAKERS = {
+    "Adame": Adame,
+    "Moussa": Moussa,
+    "Bourama": Bourama,
+    "Modibo": Modibo,
+    "Seydou": Seydou
+}
+def validate_inputs(text, temperature, top_k, top_p, max_tokens):
+    """Validate user inputs"""
+    if not text or not text.strip():
+        return False, "Please enter some Bambara text."
+    if not (0.001 <= temperature <= 1):
+        return False, "Temperature must be between positive"
+    if not (1 <= top_k <= 100):
+        return False, "Top-K must be between 1 and 100"
+    if not (0.1 <= top_p <= 1.0):
+        return False, "Top-P must be between 0.1 and 1.0"
+    return True, ""
+@spaces.GPU()
+def generate_speech(text, speaker_name, use_advanced, temperature, top_k, top_p, max_tokens):
+    if not text.strip():
+        return None, "Please enter some Bambara text."
+    try:
+        speaker = SPEAKERS[speaker_name]
+        if use_advanced:
+            is_valid, error_msg = validate_inputs(text, temperature, top_k, top_p, max_tokens)
+            if not is_valid:
+                return None, f"❌ {error_msg}"
+            waveform = tts.generate_speech(
+                text=text.strip(),
+                speaker_id=speaker,
+                temperature=temperature,
+                top_k=int(top_k),
+                top_p=top_p,
+                max_new_audio_tokens=int(max_tokens)
+            )
+        else:
+            waveform = tts.generate_speech(
+                text=text.strip(),
+                speaker_id=speaker
+            )
+        if waveform.size == 0:
+            return None, "Failed to generate audio. Please try again."
+        sample_rate = 16000
+        return (sample_rate, waveform), f"✅ Audio generated successfully"
+    except Exception as e:
+        return None, f"❌ Error: {str(e)}"
+examples = [
+    ["Aw ni ce", "Adame"],
+    ["I ni ce", "Moussa"],
+    ["Aw ni tile", "Bourama"],
+    ["I ka kene wa?", "Modibo"],
+    ["Ala ka Mali suma", "Adame"],
+    ["sigikafɔ kɔnɔ jamanaw ni ɲɔgɔn cɛ, olu ye a haminankow ye, wa o ko ninnu ka kan ka kɛ sariya ani tilennenya kɔnɔ", "Seydou"],
+    ["Aw ni ce. Ne tɔgɔ ye Kaya Magan. Aw Sanbe Sanbe.", "Moussa"],
+    ["An dɔlakelen bɛ masike bilenman don ka tɔw gɛn.", "Bourama"],
+    ["Aw ni ce. Seidu bɛ aw fo wa aw ka yafa a ma, ka da a kan tuma dɔw la kow ka can.", "Modibo"],
+]
+# Create Gradio interface
+with gr.Blocks(title="Bambara TTS - EXPERIMENTAL", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🎤 Bambara Text-to-Speech ⚠️ EXPERIMENTAL
+    Convert Bambara text to speech using AI. This model is currently experimental.
+    **Bambara** is spoken by millions of people in Mali and West Africa.
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            # Input section
+            text_input = gr.Textbox(
+                label="📝 Bambara Text",
+                placeholder="Type your Bambara text here...",
+                lines=3,
+                max_lines=6,
+                value="Aw ni ce"
+            )
+            speaker_dropdown = gr.Dropdown(
+                choices=list(SPEAKERS.keys()),
+                value="Adame",
+                label="🗣️ Speaker Voice"
+            )
+            generate_btn = gr.Button("🎵 Generate Speech", variant="primary", size="lg")
+        with gr.Column(scale=1):
+            use_advanced = gr.Checkbox(
+                label="⚙️ Use Advanced Settings",
+                value=False,
+                info="Enable to customize generation parameters"
+            )
+            with gr.Group(visible=False) as advanced_group:
+                gr.Markdown("**Advanced Parameters:**")
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=2.0,
+                    value=0.8,
+                    step=0.1,
+                    label="Temperature",
+                    info="Higher = more varied"
+                )
+                top_k = gr.Slider(
+                    minimum=1,
+                    maximum=100,
+                    value=50,
+                    step=5,
+                    label="Top-K"
+                )
+                top_p = gr.Slider(
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.9,
+                    step=0.05,
+                    label="Top-P"
+                )
+                max_tokens = gr.Slider(
+                    minimum=256,
+                    maximum=4096,
+                    value=2048,
+                    step=256,
+                    label="Max Length"
+                )
+    gr.Markdown("### 🔊 Generated Audio")
+    audio_output = gr.Audio(
+        label="Generated Speech",
+        type="numpy",
+        interactive=False
+    )
+    status_output = gr.Textbox(
+        label="Status",
+        interactive=False,
+        show_label=False,
+        container=False
+    )
+    with gr.Accordion("📚 Try These Examples", open=True):
+        def load_example(text, speaker):
+            return text, speaker, False, 0.8, 50, 0.9, 2048
+        gr.Markdown("**Click any example below:**")
+        example_buttons = []
+        for i, (text, speaker) in enumerate(examples):
+            btn = gr.Button(f"🎯 {text[:30]}{'...' if len(text) > 30 else ''}", size="sm")
+            btn.click(
+                fn=lambda t=text, s=speaker: load_example(t, s),
+                outputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens]
+            )
+    # Information section
+    with gr.Accordion("ℹ️ About", open=False):
+        gr.Markdown("""
+        **⚠️ This is an experimental Bambara TTS model.**
+        """)
+    def toggle_advanced(use_adv):
+        return gr.Group(visible=use_adv)
+    use_advanced.change(
+        fn=toggle_advanced,
+        inputs=[use_advanced],
+        outputs=[advanced_group]
+    )
+    generate_btn.click(
+        fn=generate_speech,
+        inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
+        outputs=[audio_output, status_output]
+    )
+    text_input.submit(
+        fn=generate_speech,
+        inputs=[text_input, speaker_dropdown, use_advanced, temperature, top_k, top_p, max_tokens],
+        outputs=[audio_output, status_output]
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )