sudoping01's picture
Update app.py
c7e5b6a verified
raw
history blame
5.64 kB
import os
import spaces
import torch
import torchaudio
import gradio as gr
import logging
from whosper import WhosperTranscriber
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
if torch.cuda.is_available():
device = "cuda"
logger.info("Using CUDA for inference.")
elif torch.backends.mps.is_available():
device = "mps"
logger.info("Using MPS for inference.")
else:
device = "cpu"
logger.info("Using CPU for inference.")
model_id = "sudoping01/maliba-asr-v1"
transcriber = WhosperTranscriber(model_id=model_id)
logger.info(f"MALIBA-ASR-v1 transcriber initialized successfully")
def resample_audio(audio_path, target_sample_rate=16000):
"""
Converts the audio file to the target sampling rate (16000 Hz).
"""
try:
waveform, original_sample_rate = torchaudio.load(audio_path)
if original_sample_rate != target_sample_rate:
resampler = torchaudio.transforms.Resample(
orig_freq=original_sample_rate,
new_freq=target_sample_rate
)
waveform = resampler(waveform)
return waveform, target_sample_rate
except Exception as e:
logger.error(f"Error resampling audio: {e}")
raise e
@spaces.GPU()
def transcribe_audio(audio_file):
"""
Transcribes the provided audio file into Bambara text using Whosper.
"""
if audio_file is None:
return "Please provide an audio file for transcription."
try:
logger.info(f"Transcribing audio file: {audio_file}")
result = transcriber.transcribe_audio(audio_file)
logger.info("Transcription successful.")
return result
except Exception as e:
logger.error(f"Transcription failed: {e}")
return f"Error during transcription: {str(e)}"
def get_example_files(directory="./examples"):
"""
Returns a list of audio files from the examples directory formatted for gr.Interface examples.
Args:
directory (str): The directory to search for audio files.
Returns:
list: A list of [audio_path] for each example file.
"""
if not os.path.exists(directory):
logger.warning(f"Examples directory {directory} not found.")
return []
audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
audio_files = []
try:
files = os.listdir(directory)
=
files.sort()
for file in files:
if any(file.lower().endswith(ext) for ext in audio_extensions):
full_path = os.path.abspath(os.path.join(directory, file))
=
audio_files.append([full_path])
logger.info(f"Found {len(audio_files)} example audio files.")
return audio_files[:5]
except Exception as e:
logger.error(f"Error reading examples directory: {e}")
return []
def main():
"""
Main function to launch the Gradio interface using gr.Interface.
"""
logger.info("Starting MALIBA-ASR-v1 Gradio interface.")
example_files = get_example_files()
interface = gr.Interface(
fn=transcribe_audio,
inputs=[
gr.Audio(
label=" Record or Upload Audio",
type="filepath",
sources=["microphone", "upload"]
)
],
outputs=gr.Textbox(
label="πŸ“ Transcribed Text (Bambara)",
lines=8,
placeholder="Your transcribed Bambara text will appear here..."
),
title="🎀 MALIBA-ASR-v1: Bambara Speech Recognition",
description="""
**Revolutionizing Bambara Speech Technology | Powered by MALIBA-AI**
Experience breakthrough Bambara speech recognition with **MALIBA-ASR-v1** - the most advanced open-source ASR model for Bambara, serving over 22 million speakers across Mali and West Africa.
**Performance**: WER 0.226 | CER 0.109 on (6-hour test set)
""",
examples=example_files if example_files else None,
cache_examples=False,
article="""
---
## πŸ† MALIBA-ASR-v1 Performance
| Metric | Value | Benchmark |
|--------|-------|-----------|
| **WER** | **0.226** | oza75/bambara-asr (test set) |
| **CER** | **0.109** | oza75/bambara-asr (test set) |
| **Test Duration** | **6 hours** | Diverse speakers & dialects |
## Revolutionary Impact
**MALIBA-ASR-v1** sets a new standard for Bambara speech recognition, significantly outperforming all existing open-source solutions. This breakthrough enables:
## πŸ‡²πŸ‡± About MALIBA-AI πŸ‡²πŸ‡±
MALIBA-AI is committed to ensuring **"No Malian Language Left Behind"** by:
- Breaking digital language barriers for 22+ million Bambara speakers
- Building cutting-edge AI technology for African languages
- Preserving Mali's rich linguistic and cultural heritage
- Democratizing access to voice technology across literacy levels
- Training the next generation of African AI researchers
---
**Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | **Dataset**: [oza75/bambara-asr](https://huggingface.co/datasets/oza75/bambara-asr)
*Empowering Mali's Future Through Community-Driven AI Innovation* πŸ‡²πŸ‡±
"""
)
interface.launch(
share=False,
server_name="0.0.0.0",
server_port=7860
)
logger.info("Gradio interface launched successfully.")
if __name__ == "__main__":
main()