import os import spaces import torch import torchaudio import gradio as gr import logging from whosper import WhosperTranscriber logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) if torch.cuda.is_available(): device = "cuda" logger.info("Using CUDA for inference.") elif torch.backends.mps.is_available(): device = "mps" logger.info("Using MPS for inference.") else: device = "cpu" logger.info("Using CPU for inference.") model_id = "sudoping01/maliba-asr-v1" transcriber = WhosperTranscriber(model_id=model_id) logger.info(f"MALIBA-ASR-v1 transcriber initialized successfully") def resample_audio(audio_path, target_sample_rate=16000): """ Converts the audio file to the target sampling rate (16000 Hz). """ try: waveform, original_sample_rate = torchaudio.load(audio_path) if original_sample_rate != target_sample_rate: resampler = torchaudio.transforms.Resample( orig_freq=original_sample_rate, new_freq=target_sample_rate ) waveform = resampler(waveform) return waveform, target_sample_rate except Exception as e: logger.error(f"Error resampling audio: {e}") raise e @spaces.GPU() def transcribe_audio(audio_file): """ Transcribes the provided audio file into Bambara text using Whosper. """ if audio_file is None: return "Please provide an audio file for transcription." try: logger.info(f"Transcribing audio file: {audio_file}") result = transcriber.transcribe_audio(audio_file) logger.info("Transcription successful.") return result except Exception as e: logger.error(f"Transcription failed: {e}") return f"Error during transcription: {str(e)}" def get_example_files(directory="./examples"): """ Returns a list of audio files from the examples directory formatted for gr.Interface examples. Args: directory (str): The directory to search for audio files. Returns: list: A list of [audio_path] for each example file. """ if not os.path.exists(directory): logger.warning(f"Examples directory {directory} not found.") return [] audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg'] audio_files = [] try: files = os.listdir(directory) = files.sort() for file in files: if any(file.lower().endswith(ext) for ext in audio_extensions): full_path = os.path.abspath(os.path.join(directory, file)) = audio_files.append([full_path]) logger.info(f"Found {len(audio_files)} example audio files.") return audio_files[:5] except Exception as e: logger.error(f"Error reading examples directory: {e}") return [] def main(): """ Main function to launch the Gradio interface using gr.Interface. """ logger.info("Starting MALIBA-ASR-v1 Gradio interface.") example_files = get_example_files() interface = gr.Interface( fn=transcribe_audio, inputs=[ gr.Audio( label=" Record or Upload Audio", type="filepath", sources=["microphone", "upload"] ) ], outputs=gr.Textbox( label="📝 Transcribed Text (Bambara)", lines=8, placeholder="Your transcribed Bambara text will appear here..." ), title="🎤 MALIBA-ASR-v1: Bambara Speech Recognition", description=""" **Revolutionizing Bambara Speech Technology | Powered by MALIBA-AI** Experience breakthrough Bambara speech recognition with **MALIBA-ASR-v1** - the most advanced open-source ASR model for Bambara, serving over 22 million speakers across Mali and West Africa. **Performance**: WER 0.226 | CER 0.109 on (6-hour test set) """, examples=example_files if example_files else None, cache_examples=False, article=""" --- ## 🏆 MALIBA-ASR-v1 Performance | Metric | Value | Benchmark | |--------|-------|-----------| | **WER** | **0.226** | oza75/bambara-asr (test set) | | **CER** | **0.109** | oza75/bambara-asr (test set) | | **Test Duration** | **6 hours** | Diverse speakers & dialects | ## Revolutionary Impact **MALIBA-ASR-v1** sets a new standard for Bambara speech recognition, significantly outperforming all existing open-source solutions. This breakthrough enables: ## 🇲🇱 About MALIBA-AI 🇲🇱 MALIBA-AI is committed to ensuring **"No Malian Language Left Behind"** by: - Breaking digital language barriers for 22+ million Bambara speakers - Building cutting-edge AI technology for African languages - Preserving Mali's rich linguistic and cultural heritage - Democratizing access to voice technology across literacy levels - Training the next generation of African AI researchers --- **Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | **Dataset**: [oza75/bambara-asr](https://huggingface.co/datasets/oza75/bambara-asr) *Empowering Mali's Future Through Community-Driven AI Innovation* 🇲🇱 """ ) interface.launch( share=False, server_name="0.0.0.0", server_port=7860 ) logger.info("Gradio interface launched successfully.") if __name__ == "__main__": main()