Spaces:

MALIBA-AI
/

BambaraSpeechToText

Running on Zero

File size: 5,637 Bytes

import os
import spaces
import torch
import torchaudio
import gradio as gr
import logging
from whosper import WhosperTranscriber


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


if torch.cuda.is_available():
    device = "cuda"
    logger.info("Using CUDA for inference.")
elif torch.backends.mps.is_available():
    device = "mps"
    logger.info("Using MPS for inference.")
else:
    device = "cpu"
    logger.info("Using CPU for inference.")


model_id = "sudoping01/maliba-asr-v1"
transcriber = WhosperTranscriber(model_id=model_id)
logger.info(f"MALIBA-ASR-v1 transcriber initialized successfully")

def resample_audio(audio_path, target_sample_rate=16000):
    """
    Converts the audio file to the target sampling rate (16000 Hz).
    """
    try:
        waveform, original_sample_rate = torchaudio.load(audio_path)
        
        if original_sample_rate != target_sample_rate:
            resampler = torchaudio.transforms.Resample(
                orig_freq=original_sample_rate, 
                new_freq=target_sample_rate
            )
            waveform = resampler(waveform)
        
        return waveform, target_sample_rate
    except Exception as e:
        logger.error(f"Error resampling audio: {e}")
        raise e

@spaces.GPU()
def transcribe_audio(audio_file):
    """
    Transcribes the provided audio file into Bambara text using Whosper.
    """
    if audio_file is None:
        return "Please provide an audio file for transcription."
    
    try:
        logger.info(f"Transcribing audio file: {audio_file}")
        result = transcriber.transcribe_audio(audio_file)
        logger.info("Transcription successful.")
        return result
        
    except Exception as e:
        logger.error(f"Transcription failed: {e}")
        return f"Error during transcription: {str(e)}"

def get_example_files(directory="./examples"):
    """
    Returns a list of audio files from the examples directory formatted for gr.Interface examples.
    
    Args:
        directory (str): The directory to search for audio files.
    Returns:
        list: A list of [audio_path] for each example file.
    """
    if not os.path.exists(directory):
        logger.warning(f"Examples directory {directory} not found.")
        return []
    

    audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
    audio_files = []
    
    try:
        files = os.listdir(directory)
=
        files.sort()
        
        for file in files:
            if any(file.lower().endswith(ext) for ext in audio_extensions):
                full_path = os.path.abspath(os.path.join(directory, file))
   =
                audio_files.append([full_path])
        
        logger.info(f"Found {len(audio_files)} example audio files.")
        return audio_files[:5]  
        
    except Exception as e:
        logger.error(f"Error reading examples directory: {e}")
        return []

def main():
    """
    Main function to launch the Gradio interface using gr.Interface.
    """
    logger.info("Starting MALIBA-ASR-v1 Gradio interface.")
    

    example_files = get_example_files()
    

    interface = gr.Interface(
        fn=transcribe_audio,
        inputs=[
            gr.Audio(
                label=" Record or Upload Audio", 
                type="filepath",
                sources=["microphone", "upload"]
            )
        ],
        outputs=gr.Textbox(
            label="📝 Transcribed Text (Bambara)",
            lines=8,
            placeholder="Your transcribed Bambara text will appear here..."
        ),
        title="🎤 MALIBA-ASR-v1: Bambara Speech Recognition",
        description="""
                    **Revolutionizing Bambara Speech Technology | Powered by MALIBA-AI**
                    
                    Experience breakthrough Bambara speech recognition with **MALIBA-ASR-v1** - the most advanced open-source ASR model for Bambara, serving over 22 million speakers across Mali and West Africa.
                    
                    
                    **Performance**: WER 0.226 | CER 0.109 on (6-hour test set)
                            """,
        examples=example_files if example_files else None,
        cache_examples=False,
        article="""
---

## 🏆 MALIBA-ASR-v1 Performance

| Metric | Value | Benchmark |
|--------|-------|-----------|
| **WER** | **0.226** | oza75/bambara-asr (test set) |
| **CER** | **0.109** | oza75/bambara-asr (test set) |
| **Test Duration** | **6 hours** | Diverse speakers & dialects |

##  Revolutionary Impact

**MALIBA-ASR-v1** sets a new standard for Bambara speech recognition, significantly outperforming all existing open-source solutions. This breakthrough enables:

##  🇲🇱 About MALIBA-AI  🇲🇱

MALIBA-AI is committed to ensuring **"No Malian Language Left Behind"** by:
- Breaking digital language barriers for 22+ million Bambara speakers
- Building cutting-edge AI technology for African languages
- Preserving Mali's rich linguistic and cultural heritage
- Democratizing access to voice technology across literacy levels
- Training the next generation of African AI researchers

---

**Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | **Dataset**: [oza75/bambara-asr](https://huggingface.co/datasets/oza75/bambara-asr)

*Empowering Mali's Future Through Community-Driven AI Innovation* 🇲🇱
        """
    )
    
    interface.launch(
        share=False,
        server_name="0.0.0.0",
        server_port=7860
    )
    
    logger.info("Gradio interface launched successfully.")

if __name__ == "__main__":
    main()