Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import spaces | |
import torch | |
import torchaudio | |
import gradio as gr | |
import logging | |
from whosper import WhosperTranscriber | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
if torch.cuda.is_available(): | |
device = "cuda" | |
logger.info("Using CUDA for inference.") | |
elif torch.backends.mps.is_available(): | |
device = "mps" | |
logger.info("Using MPS for inference.") | |
else: | |
device = "cpu" | |
logger.info("Using CPU for inference.") | |
model_id = "sudoping01/maliba-asr-v1" | |
transcriber = WhosperTranscriber(model_id=model_id) | |
logger.info(f"MALIBA-ASR-v1 transcriber initialized successfully") | |
def resample_audio(audio_path, target_sample_rate=16000): | |
""" | |
Converts the audio file to the target sampling rate (16000 Hz). | |
""" | |
try: | |
waveform, original_sample_rate = torchaudio.load(audio_path) | |
if original_sample_rate != target_sample_rate: | |
resampler = torchaudio.transforms.Resample( | |
orig_freq=original_sample_rate, | |
new_freq=target_sample_rate | |
) | |
waveform = resampler(waveform) | |
return waveform, target_sample_rate | |
except Exception as e: | |
logger.error(f"Error resampling audio: {e}") | |
raise e | |
def transcribe_audio(audio_file): | |
""" | |
Transcribes the provided audio file into Bambara text using Whosper. | |
""" | |
if audio_file is None: | |
return "Please provide an audio file for transcription." | |
try: | |
logger.info(f"Transcribing audio file: {audio_file}") | |
result = transcriber.transcribe_audio(audio_file) | |
logger.info("Transcription successful.") | |
return result | |
except Exception as e: | |
logger.error(f"Transcription failed: {e}") | |
return f"Error during transcription: {str(e)}" | |
def get_example_files(directory="./examples"): | |
""" | |
Returns a list of audio files from the examples directory formatted for gr.Interface examples. | |
Args: | |
directory (str): The directory to search for audio files. | |
Returns: | |
list: A list of [audio_path] for each example file. | |
""" | |
if not os.path.exists(directory): | |
logger.warning(f"Examples directory {directory} not found.") | |
return [] | |
audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg'] | |
audio_files = [] | |
try: | |
files = os.listdir(directory) | |
= | |
files.sort() | |
for file in files: | |
if any(file.lower().endswith(ext) for ext in audio_extensions): | |
full_path = os.path.abspath(os.path.join(directory, file)) | |
= | |
audio_files.append([full_path]) | |
logger.info(f"Found {len(audio_files)} example audio files.") | |
return audio_files[:5] | |
except Exception as e: | |
logger.error(f"Error reading examples directory: {e}") | |
return [] | |
def main(): | |
""" | |
Main function to launch the Gradio interface using gr.Interface. | |
""" | |
logger.info("Starting MALIBA-ASR-v1 Gradio interface.") | |
example_files = get_example_files() | |
interface = gr.Interface( | |
fn=transcribe_audio, | |
inputs=[ | |
gr.Audio( | |
label=" Record or Upload Audio", | |
type="filepath", | |
sources=["microphone", "upload"] | |
) | |
], | |
outputs=gr.Textbox( | |
label="π Transcribed Text (Bambara)", | |
lines=8, | |
placeholder="Your transcribed Bambara text will appear here..." | |
), | |
title="π€ MALIBA-ASR-v1: Bambara Speech Recognition", | |
description=""" | |
**Revolutionizing Bambara Speech Technology | Powered by MALIBA-AI** | |
Experience breakthrough Bambara speech recognition with **MALIBA-ASR-v1** - the most advanced open-source ASR model for Bambara, serving over 22 million speakers across Mali and West Africa. | |
**Performance**: WER 0.226 | CER 0.109 on (6-hour test set) | |
""", | |
examples=example_files if example_files else None, | |
cache_examples=False, | |
article=""" | |
--- | |
## π MALIBA-ASR-v1 Performance | |
| Metric | Value | Benchmark | | |
|--------|-------|-----------| | |
| **WER** | **0.226** | oza75/bambara-asr (test set) | | |
| **CER** | **0.109** | oza75/bambara-asr (test set) | | |
| **Test Duration** | **6 hours** | Diverse speakers & dialects | | |
## Revolutionary Impact | |
**MALIBA-ASR-v1** sets a new standard for Bambara speech recognition, significantly outperforming all existing open-source solutions. This breakthrough enables: | |
## π²π± About MALIBA-AI π²π± | |
MALIBA-AI is committed to ensuring **"No Malian Language Left Behind"** by: | |
- Breaking digital language barriers for 22+ million Bambara speakers | |
- Building cutting-edge AI technology for African languages | |
- Preserving Mali's rich linguistic and cultural heritage | |
- Democratizing access to voice technology across literacy levels | |
- Training the next generation of African AI researchers | |
--- | |
**Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | **Dataset**: [oza75/bambara-asr](https://huggingface.co/datasets/oza75/bambara-asr) | |
*Empowering Mali's Future Through Community-Driven AI Innovation* π²π± | |
""" | |
) | |
interface.launch( | |
share=False, | |
server_name="0.0.0.0", | |
server_port=7860 | |
) | |
logger.info("Gradio interface launched successfully.") | |
if __name__ == "__main__": | |
main() |