Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import spaces | |
import torch | |
import torchaudio | |
import gradio as gr | |
import logging | |
from whosper import WhosperTranscriber | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
if torch.cuda.is_available(): | |
device = "cuda" | |
logger.info("Using CUDA for inference.") | |
elif torch.backends.mps.is_available(): | |
device = "mps" | |
logger.info("Using MPS for inference.") | |
else: | |
device = "cpu" | |
logger.info("Using CPU for inference.") | |
model_id = "sudoping01/maliba-asr-v1" | |
transcriber = WhosperTranscriber(model_id=model_id) | |
logger.info(f"Transcriber initialized with model: {model_id}") | |
def resample_audio(audio_path, target_sample_rate=16000): | |
""" | |
Converts the audio file to the target sampling rate (16000 Hz). | |
Args: | |
audio_path (str): Path to the audio file. | |
target_sample_rate (int): The desired sample rate. | |
Returns: | |
A tensor containing the resampled audio data and the target sample rate. | |
""" | |
try: | |
waveform, original_sample_rate = torchaudio.load(audio_path) | |
if original_sample_rate != target_sample_rate: | |
resampler = torchaudio.transforms.Resample( | |
orig_freq=original_sample_rate, | |
new_freq=target_sample_rate | |
) | |
waveform = resampler(waveform) | |
return waveform, target_sample_rate | |
except Exception as e: | |
logger.error(f"Error resampling audio: {e}") | |
raise e | |
def transcribe_audio(audio_file): | |
""" | |
Transcribes the provided audio file into Bambara text using Whosper. | |
Args: | |
audio_file: The path to the audio file to transcribe. | |
Returns: | |
A string representing the transcribed Bambara text. | |
""" | |
if audio_file is None: | |
return "Please provide an audio file for transcription." | |
try: | |
logger.info(f"Transcribing audio file: {audio_file}") | |
result = transcriber.transcribe_audio(audio_file) | |
logger.info("Transcription successful.") | |
return result.get("text", "") | |
except Exception as e: | |
logger.error(f"Transcription failed: {e}") | |
return f"Error during transcription: {str(e)}" | |
def get_example_files(directory="./examples"): | |
""" | |
Returns a list of audio files from the examples directory. | |
Args: | |
directory (str): The directory to search for audio files. | |
Returns: | |
list: A list of paths to the audio files. | |
""" | |
if not os.path.exists(directory): | |
logger.warning(f"Examples directory {directory} not found.") | |
return [] | |
audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg'] | |
audio_files = [] | |
try: | |
files = os.listdir(directory) | |
for file in files: | |
if any(file.lower().endswith(ext) for ext in audio_extensions): | |
full_path = os.path.abspath(os.path.join(directory, file)) | |
audio_files.append(full_path) | |
logger.info(f"Found {len(audio_files)} example audio files.") | |
return audio_files[:5] | |
except Exception as e: | |
logger.error(f"Error reading examples directory: {e}") | |
return [] | |
def build_interface(): | |
""" | |
Builds the Gradio interface for Bambara speech recognition. | |
""" | |
example_files = get_example_files() | |
with gr.Blocks(title="Bambara Speech Recognition") as demo: | |
gr.Markdown( | |
""" | |
# π€ Bambara Automatic Speech Recognition | |
**Powered by MALIBA-AI** | |
Convert Bambara speech to text using our state-of-the-art ASR model. You can either: | |
- ποΈ **Record** your voice directly | |
- π **Upload** an audio file | |
- π΅ **Try** our example audio files | |
## Supported Audio Formats | |
WAV, MP3, M4A, FLAC, OGG | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio( | |
label="π€ Record or Upload Audio", | |
type="filepath", | |
sources=["microphone", "upload"] | |
) | |
transcribe_btn = gr.Button( | |
"π Transcribe Audio", | |
variant="primary", | |
size="lg" | |
) | |
clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
with gr.Column(): | |
output_text = gr.Textbox( | |
label="π Transcribed Text (Bambara)", | |
lines=8, | |
placeholder="Your transcribed Bambara text will appear here...", | |
interactive=False | |
) | |
if example_files: | |
gr.Markdown("## π΅ Try These Examples") | |
gr.Examples( | |
examples=[[f] for f in example_files], | |
inputs=[audio_input], | |
outputs=output_text, | |
fn=transcribe_audio, | |
cache_examples=False, | |
label="Example Audio Files" | |
) | |
gr.Markdown( | |
""" | |
--- | |
## βΉοΈ About This Model | |
- **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1) | |
- **Developer:** MALIBA-AI | |
- **Language:** Bambara (bm) | |
- **Task:** Automatic Speech Recognition (ASR) | |
- **Sample Rate:** 16kHz (automatically resampled) | |
## π How to Use | |
1. **Record Audio:** Click the microphone button and speak in Bambara | |
2. **Upload File:** Click the upload button to select an audio file | |
3. **Transcribe:** Click the "Transcribe Audio" button | |
4. **View Results:** See your transcribed text in Bambara | |
## π Performance Notes | |
- Best results with clear speech and minimal background noise | |
- Supports various audio formats and durations | |
- Optimized for Bambara language patterns and phonetics | |
""" | |
) | |
transcribe_btn.click( | |
fn=transcribe_audio, | |
inputs=[audio_input], | |
outputs=output_text, | |
show_progress=True | |
) | |
clear_btn.click( | |
fn=lambda: (None, ""), | |
outputs=[audio_input, output_text] | |
) | |
audio_input.change( | |
fn=transcribe_audio, | |
inputs=[audio_input], | |
outputs=output_text, | |
show_progress=True | |
) | |
return demo | |
def main(): | |
""" | |
Main function to launch the Gradio interface. | |
""" | |
logger.info("Starting Bambara ASR Gradio interface.") | |
interface = build_interface() | |
interface.launch( | |
share=False, | |
server_name="0.0.0.0", | |
server_port=7860 | |
) | |
logger.info("Gradio interface launched successfully.") | |
if __name__ == "__main__": | |
main() |