Spaces:

MALIBA-AI
/

BambaraSpeechToText

Running on Zero

App Files Files Community

sudoping01 commited on 13 days ago

Commit

c7e5b6a

verified ·

1 Parent(s): c8322f0

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -137

app.py CHANGED Viewed

@@ -24,18 +24,11 @@ else:
 model_id = "sudoping01/maliba-asr-v1"
 transcriber = WhosperTranscriber(model_id=model_id)
-logger.info(f"Transcriber initialized with model: {model_id}")
 def resample_audio(audio_path, target_sample_rate=16000):
     """
     Converts the audio file to the target sampling rate (16000 Hz).
-    Args:
-        audio_path (str): Path to the audio file.
-        target_sample_rate (int): The desired sample rate.
-    Returns:
-        A tensor containing the resampled audio data and the target sample rate.
     """
     try:
         waveform, original_sample_rate = torchaudio.load(audio_path)
@@ -54,25 +47,15 @@ def resample_audio(audio_path, target_sample_rate=16000):
 @spaces.GPU()
 def transcribe_audio(audio_file):
     """
     Transcribes the provided audio file into Bambara text using Whosper.
-    Args:
-        audio_file: The path to the audio file to transcribe.
-    Returns:
-        A string representing the transcribed Bambara text.
     """
     if audio_file is None:
         return "Please provide an audio file for transcription."
     try:
         logger.info(f"Transcribing audio file: {audio_file}")
         result = transcriber.transcribe_audio(audio_file)
         logger.info("Transcription successful.")
         return result
@@ -81,16 +64,14 @@ def transcribe_audio(audio_file):
         return f"Error during transcription: {str(e)}"
 def get_example_files(directory="./examples"):
     """
-    Returns a list of audio files from the examples directory.
     Args:
         directory (str): The directory to search for audio files.
     Returns:
-        list: A list of paths to the audio files.
     """
     if not os.path.exists(directory):
         logger.warning(f"Examples directory {directory} not found.")
         return []
@@ -101,10 +82,14 @@ def get_example_files(directory="./examples"):
     try:
         files = os.listdir(directory)
         for file in files:
             if any(file.lower().endswith(ext) for ext in audio_extensions):
                 full_path = os.path.abspath(os.path.join(directory, file))
-                audio_files.append(full_path)
         logger.info(f"Found {len(audio_files)} example audio files.")
         return audio_files[:5]
@@ -113,127 +98,73 @@ def get_example_files(directory="./examples"):
         logger.error(f"Error reading examples directory: {e}")
         return []
-def build_interface():
     """
-    Builds the Gradio interface for Bambara speech recognition.
     """
-    example_files = get_example_files()
-    with gr.Blocks(title="Bambara Speech Recognition") as demo:
-        gr.Markdown(
-            """
-            # 🎤 Bambara Automatic Speech Recognition
-            **Powered by MALIBA-AI**
-            Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
-            - 🎙️ **Record** your voice directly
-            - 📁 **Upload** an audio file
-            - 🎵 **Try** our example audio files
-            ## Supported Audio Formats
-            WAV, MP3, M4A, FLAC, OGG
-            """
-        )
-        with gr.Row():
-            with gr.Column():
-                audio_input = gr.Audio(
-                    label="🎤 Record or Upload Audio",
-                    type="filepath",
-                    sources=["microphone", "upload"]
-                )
-                transcribe_btn = gr.Button(
-                    "🔄 Transcribe Audio",
-                    variant="primary",
-                    size="lg"
-                )
-                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
-            with gr.Column():
-                output_text = gr.Textbox(
-                    label="📝 Transcribed Text (Bambara)",
-                    lines=8,
-                    placeholder="Your transcribed Bambara text will appear here...",
-                    interactive=False
-                )
-        # Examples section
-        if example_files:
-            gr.Markdown("## 🎵 Try These Examples")
-            gr.Examples(
-                examples=[[f] for f in example_files],
-                inputs=[audio_input],
-                outputs=output_text,
-                fn=transcribe_audio,
-                cache_examples=False,
-                label="Example Audio Files"
-            )
-        # Information section
-        gr.Markdown(
-            """
-            ---
-            ## ℹ️ About This Model
-            - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
-            - **Developer:** MALIBA-AI
-            - **Language:** Bambara (bm)
-            - **Task:** Automatic Speech Recognition (ASR)
-            - **Sample Rate:** 16kHz (automatically resampled)
-            ## 🚀 How to Use
-            1. **Record Audio:** Click the microphone button and speak in Bambara
-            2. **Upload File:** Click the upload button to select an audio file
-            3. **Transcribe:** Click the "Transcribe Audio" button
-            4. **View Results:** See your transcribed text in Bambara
-            ## 📊 Performance Notes
-            - Best results with clear speech and minimal background noise
-            - Supports various audio formats and durations
-            - Optimized for Bambara language patterns and phonetics
-            """
-        )
-        transcribe_btn.click(
-            fn=transcribe_audio,
-            inputs=[audio_input],
-            outputs=output_text,
-            show_progress=True
-        )
-        clear_btn.click(
-            fn=lambda: (None, ""),
-            outputs=[audio_input, output_text]
-        )
-        audio_input.change(
-            fn=transcribe_audio,
-            inputs=[audio_input],
-            outputs=output_text,
-            show_progress=True
-        )
-    return demo
-def main():
-    """
-    Main function to launch the Gradio interface.
-    """
-    logger.info("Starting Bambara ASR Gradio interface.")
-    interface = build_interface()
     interface.launch(
         share=False,
         server_name="0.0.0.0",

 model_id = "sudoping01/maliba-asr-v1"
 transcriber = WhosperTranscriber(model_id=model_id)
+logger.info(f"MALIBA-ASR-v1 transcriber initialized successfully")
 def resample_audio(audio_path, target_sample_rate=16000):
     """
     Converts the audio file to the target sampling rate (16000 Hz).
     """
     try:
         waveform, original_sample_rate = torchaudio.load(audio_path)
 @spaces.GPU()
 def transcribe_audio(audio_file):
     """
     Transcribes the provided audio file into Bambara text using Whosper.
     """
     if audio_file is None:
         return "Please provide an audio file for transcription."
     try:
         logger.info(f"Transcribing audio file: {audio_file}")
         result = transcriber.transcribe_audio(audio_file)
         logger.info("Transcription successful.")
         return result
         return f"Error during transcription: {str(e)}"
 def get_example_files(directory="./examples"):
     """
+    Returns a list of audio files from the examples directory formatted for gr.Interface examples.
     Args:
         directory (str): The directory to search for audio files.
     Returns:
+        list: A list of [audio_path] for each example file.
     """
     if not os.path.exists(directory):
         logger.warning(f"Examples directory {directory} not found.")
         return []
     try:
         files = os.listdir(directory)
+=
+        files.sort()
         for file in files:
             if any(file.lower().endswith(ext) for ext in audio_extensions):
                 full_path = os.path.abspath(os.path.join(directory, file))
+   =
+                audio_files.append([full_path])
         logger.info(f"Found {len(audio_files)} example audio files.")
         return audio_files[:5]
         logger.error(f"Error reading examples directory: {e}")
         return []
+def main():
     """
+    Main function to launch the Gradio interface using gr.Interface.
     """
+    logger.info("Starting MALIBA-ASR-v1 Gradio interface.")
+    example_files = get_example_files()
+    interface = gr.Interface(
+        fn=transcribe_audio,
+        inputs=[
+            gr.Audio(
+                label=" Record or Upload Audio",
+                type="filepath",
+                sources=["microphone", "upload"]
+            )
+        ],
+        outputs=gr.Textbox(
+            label="📝 Transcribed Text (Bambara)",
+            lines=8,
+            placeholder="Your transcribed Bambara text will appear here..."
+        ),
+        title="🎤 MALIBA-ASR-v1: Bambara Speech Recognition",
+        description="""
+                    **Revolutionizing Bambara Speech Technology | Powered by MALIBA-AI**
+                    Experience breakthrough Bambara speech recognition with **MALIBA-ASR-v1** - the most advanced open-source ASR model for Bambara, serving over 22 million speakers across Mali and West Africa.
+                    **Performance**: WER 0.226 | CER 0.109 on (6-hour test set)
+                            """,
+        examples=example_files if example_files else None,
+        cache_examples=False,
+        article="""
+---
+## 🏆 MALIBA-ASR-v1 Performance
+| Metric | Value | Benchmark |
+|--------|-------|-----------|
+| **WER** | **0.226** | oza75/bambara-asr (test set) |
+| **CER** | **0.109** | oza75/bambara-asr (test set) |
+| **Test Duration** | **6 hours** | Diverse speakers & dialects |
+##  Revolutionary Impact
+**MALIBA-ASR-v1** sets a new standard for Bambara speech recognition, significantly outperforming all existing open-source solutions. This breakthrough enables:
+##  🇲🇱 About MALIBA-AI  🇲🇱
+MALIBA-AI is committed to ensuring **"No Malian Language Left Behind"** by:
+- Breaking digital language barriers for 22+ million Bambara speakers
+- Building cutting-edge AI technology for African languages
+- Preserving Mali's rich linguistic and cultural heritage
+- Democratizing access to voice technology across literacy levels
+- Training the next generation of African AI researchers
+---
+**Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | **Dataset**: [oza75/bambara-asr](https://huggingface.co/datasets/oza75/bambara-asr)
+*Empowering Mali's Future Through Community-Driven AI Innovation* 🇲🇱
+        """
+    )
     interface.launch(
         share=False,
         server_name="0.0.0.0",