Spaces:

MALIBA-AI
/

BambaraSpeechToText

Running on Zero

App Files Files Community

BambaraSpeechToText / app.py

sudoping01

Update app.py

8522dd2 verified 12 days ago

raw

history blame contribute delete

7.2 kB

	import os
	import spaces
	import torch
	import torchaudio
	import gradio as gr
	import logging
	from whosper import WhosperTranscriber


	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	if torch.cuda.is_available():
	device = "cuda"
	logger.info("Using CUDA for inference.")
	elif torch.backends.mps.is_available():
	device = "mps"
	logger.info("Using MPS for inference.")
	else:
	device = "cpu"
	logger.info("Using CPU for inference.")


	model_id = "sudoping01/maliba-asr-v1"
	transcriber = WhosperTranscriber(model_id=model_id)
	logger.info(f"Transcriber initialized with model: {model_id}")

	def resample_audio(audio_path, target_sample_rate=16000):

	"""
	Converts the audio file to the target sampling rate (16000 Hz).

	Args:
	audio_path (str): Path to the audio file.
	target_sample_rate (int): The desired sample rate.
	Returns:
	A tensor containing the resampled audio data and the target sample rate.
	"""
	try:
	waveform, original_sample_rate = torchaudio.load(audio_path)

	if original_sample_rate != target_sample_rate:
	resampler = torchaudio.transforms.Resample(
	orig_freq=original_sample_rate,
	new_freq=target_sample_rate
	)
	waveform = resampler(waveform)

	return waveform, target_sample_rate
	except Exception as e:
	logger.error(f"Error resampling audio: {e}")
	raise e

	@spaces.GPU()
	def transcribe_audio(audio_file):

	"""
	Transcribes the provided audio file into Bambara text using Whosper.

	Args:
	audio_file: The path to the audio file to transcribe.
	Returns:
	A string representing the transcribed Bambara text.
	"""

	if audio_file is None:
	return "Please provide an audio file for transcription."

	try:
	logger.info(f"Transcribing audio file: {audio_file}")


	result = transcriber.transcribe_audio(audio_file)

	logger.info("Transcription successful.")
	return result.get("text", "")

	except Exception as e:
	logger.error(f"Transcription failed: {e}")
	return f"Error during transcription: {str(e)}"

	def get_example_files(directory="./examples"):

	"""
	Returns a list of audio files from the examples directory.

	Args:
	directory (str): The directory to search for audio files.
	Returns:
	list: A list of paths to the audio files.
	"""

	if not os.path.exists(directory):
	logger.warning(f"Examples directory {directory} not found.")
	return []


	audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
	audio_files = []

	try:
	files = os.listdir(directory)
	for file in files:
	if any(file.lower().endswith(ext) for ext in audio_extensions):
	full_path = os.path.abspath(os.path.join(directory, file))
	audio_files.append(full_path)

	logger.info(f"Found {len(audio_files)} example audio files.")
	return audio_files[:5]

	except Exception as e:
	logger.error(f"Error reading examples directory: {e}")
	return []

	def build_interface():
	"""
	Builds the Gradio interface for Bambara speech recognition.
	"""

	example_files = get_example_files()

	with gr.Blocks(title="Bambara Speech Recognition") as demo:
	gr.Markdown(
	"""
	# 🎤 Bambara Automatic Speech Recognition

	Powered by MALIBA-AI

	Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
	- 🎙️ Record your voice directly
	- 📁 Upload an audio file
	- 🎵 Try our example audio files

	## Supported Audio Formats
	WAV, MP3, M4A, FLAC, OGG
	"""
	)

	with gr.Row():
	with gr.Column():

	audio_input = gr.Audio(
	label="🎤 Record or Upload Audio",
	type="filepath",
	sources=["microphone", "upload"]
	)

	transcribe_btn = gr.Button(
	"🔄 Transcribe Audio",
	variant="primary",
	size="lg"
	)


	clear_btn = gr.Button("🗑️ Clear", variant="secondary")

	with gr.Column():
	output_text = gr.Textbox(
	label="📝 Transcribed Text (Bambara)",
	lines=8,
	placeholder="Your transcribed Bambara text will appear here...",
	interactive=False
	)


	if example_files:
	gr.Markdown("## 🎵 Try These Examples")
	gr.Examples(
	examples=[[f] for f in example_files],
	inputs=[audio_input],
	outputs=output_text,
	fn=transcribe_audio,
	cache_examples=False,
	label="Example Audio Files"
	)


	gr.Markdown(
	"""
	---

	## ℹ️ About This Model

	- Model: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
	- Developer: MALIBA-AI
	- Language: Bambara (bm)
	- Task: Automatic Speech Recognition (ASR)
	- Sample Rate: 16kHz (automatically resampled)

	## 🚀 How to Use

	1. Record Audio: Click the microphone button and speak in Bambara
	2. Upload File: Click the upload button to select an audio file
	3. Transcribe: Click the "Transcribe Audio" button
	4. View Results: See your transcribed text in Bambara

	## 📊 Performance Notes

	- Best results with clear speech and minimal background noise
	- Supports various audio formats and durations
	- Optimized for Bambara language patterns and phonetics
	"""
	)


	transcribe_btn.click(
	fn=transcribe_audio,
	inputs=[audio_input],
	outputs=output_text,
	show_progress=True
	)

	clear_btn.click(
	fn=lambda: (None, ""),
	outputs=[audio_input, output_text]
	)


	audio_input.change(
	fn=transcribe_audio,
	inputs=[audio_input],
	outputs=output_text,
	show_progress=True
	)

	return demo

	def main():
	"""
	Main function to launch the Gradio interface.
	"""
	logger.info("Starting Bambara ASR Gradio interface.")


	interface = build_interface()
	interface.launch(
	share=False,
	server_name="0.0.0.0",
	server_port=7860
	)

	logger.info("Gradio interface launched successfully.")

	if __name__ == "__main__":
	main()