Spaces:

MALIBA-AI
/

BambaraSpeechToText

Running on Zero

App Files Files Community

sudoping01 commited on 13 days ago

Commit

6f63a5e

1 Parent(s): d63d0eb

space init commit

Browse files

Files changed (5) hide show

app.py +246 -0
examples/test1.wav +3 -0
examples/test_00.mp3 +3 -0
examples/test_01.wav +3 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,246 @@

+import os
+import spaces
+import torch
+import torchaudio
+import gradio as gr
+import logging
+from whosper import WhosperTranscriber
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+if torch.cuda.is_available():
+    device = "cuda"
+    logger.info("Using CUDA for inference.")
+elif torch.backends.mps.is_available():
+    device = "mps"
+    logger.info("Using MPS for inference.")
+else:
+    device = "cpu"
+    logger.info("Using CPU for inference.")
+model_id = "sudoping01/maliba-asr-v1"
+transcriber = WhosperTranscriber(model_id=model_id)
+logger.info(f"Transcriber initialized with model: {model_id}")
+def resample_audio(audio_path, target_sample_rate=16000):
+    """
+    Converts the audio file to the target sampling rate (16000 Hz).
+    Args:
+        audio_path (str): Path to the audio file.
+        target_sample_rate (int): The desired sample rate.
+    Returns:
+        A tensor containing the resampled audio data and the target sample rate.
+    """
+    try:
+        waveform, original_sample_rate = torchaudio.load(audio_path)
+        if original_sample_rate != target_sample_rate:
+            resampler = torchaudio.transforms.Resample(
+                orig_freq=original_sample_rate,
+                new_freq=target_sample_rate
+            )
+            waveform = resampler(waveform)
+        return waveform, target_sample_rate
+    except Exception as e:
+        logger.error(f"Error resampling audio: {e}")
+        raise e
+@spaces.GPU()
+def transcribe_audio(audio_file):
+    """
+    Transcribes the provided audio file into Bambara text using Whosper.
+    Args:
+        audio_file: The path to the audio file to transcribe.
+    Returns:
+        A string representing the transcribed Bambara text.
+    """
+    if audio_file is None:
+        return "Please provide an audio file for transcription."
+    try:
+        logger.info(f"Transcribing audio file: {audio_file}")
+        result = transcriber.transcribe_audio(audio_file)
+        logger.info("Transcription successful.")
+        return result
+    except Exception as e:
+        logger.error(f"Transcription failed: {e}")
+        return f"Error during transcription: {str(e)}"
+def get_example_files(directory="./examples"):
+    """
+    Returns a list of audio files from the examples directory.
+    Args:
+        directory (str): The directory to search for audio files.
+    Returns:
+        list: A list of paths to the audio files.
+    """
+    if not os.path.exists(directory):
+        logger.warning(f"Examples directory {directory} not found.")
+        return []
+    audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
+    audio_files = []
+    try:
+        files = os.listdir(directory)
+        for file in files:
+            if any(file.lower().endswith(ext) for ext in audio_extensions):
+                full_path = os.path.abspath(os.path.join(directory, file))
+                audio_files.append(full_path)
+        logger.info(f"Found {len(audio_files)} example audio files.")
+        return audio_files[:5]
+    except Exception as e:
+        logger.error(f"Error reading examples directory: {e}")
+        return []
+def build_interface():
+    """
+    Builds the Gradio interface for Bambara speech recognition.
+    """
+    example_files = get_example_files()
+    with gr.Blocks(title="Bambara Speech Recognition") as demo:
+        gr.Markdown(
+            """
+            # 🎤 Bambara Automatic Speech Recognition
+            **Powered by MALIBA-AI**
+            Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
+            - 🎙️ **Record** your voice directly
+            - 📁 **Upload** an audio file
+            - 🎵 **Try** our example audio files
+            ## Supported Audio Formats
+            WAV, MP3, M4A, FLAC, OGG
+            """
+        )
+        with gr.Row():
+            with gr.Column():
+                audio_input = gr.Audio(
+                    label="🎤 Record or Upload Audio",
+                    type="filepath",
+                    sources=["microphone", "upload"]
+                )
+                transcribe_btn = gr.Button(
+                    "🔄 Transcribe Audio",
+                    variant="primary",
+                    size="lg"
+                )
+                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+            with gr.Column():
+                output_text = gr.Textbox(
+                    label="📝 Transcribed Text (Bambara)",
+                    lines=8,
+                    placeholder="Your transcribed Bambara text will appear here...",
+                    interactive=False
+                )
+        # Examples section
+        if example_files:
+            gr.Markdown("## 🎵 Try These Examples")
+            gr.Examples(
+                examples=[[f] for f in example_files],
+                inputs=[audio_input],
+                outputs=output_text,
+                fn=transcribe_audio,
+                cache_examples=False,
+                label="Example Audio Files"
+            )
+        # Information section
+        gr.Markdown(
+            """
+            ---
+            ## ℹ️ About This Model
+            - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
+            - **Developer:** MALIBA-AI
+            - **Language:** Bambara (bm)
+            - **Task:** Automatic Speech Recognition (ASR)
+            - **Sample Rate:** 16kHz (automatically resampled)
+            ## 🚀 How to Use
+            1. **Record Audio:** Click the microphone button and speak in Bambara
+            2. **Upload File:** Click the upload button to select an audio file
+            3. **Transcribe:** Click the "Transcribe Audio" button
+            4. **View Results:** See your transcribed text in Bambara
+            ## 📊 Performance Notes
+            - Best results with clear speech and minimal background noise
+            - Supports various audio formats and durations
+            - Optimized for Bambara language patterns and phonetics
+            """
+        )
+        transcribe_btn.click(
+            fn=transcribe_audio,
+            inputs=[audio_input],
+            outputs=output_text,
+            show_progress=True
+        )
+        clear_btn.click(
+            fn=lambda: (None, ""),
+            outputs=[audio_input, output_text]
+        )
+        audio_input.change(
+            fn=transcribe_audio,
+            inputs=[audio_input],
+            outputs=output_text,
+            show_progress=True
+        )
+    return demo
+def main():
+    """
+    Main function to launch the Gradio interface.
+    """
+    logger.info("Starting Bambara ASR Gradio interface.")
+    interface = build_interface()
+    interface.launch(
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860
+    )
+    logger.info("Gradio interface launched successfully.")
+if __name__ == "__main__":
+    main()

examples/test1.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ce23883eac43409832416424b84458eadc02a1efb964b36db35e3c54f3cdccee
+size 1295454

examples/test_00.mp3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd7e68bddcc83a5a315fa712c58f6dc07095142ba926d33a04f1bb13f4552d60
+size 154737

examples/test_01.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db1fe475d58ed4c1738df485310e04a12d4176a7ef65224d1ffeaa93e70c7958
+size 1289004

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=4.0.0
+torch>=2.0.0
+torchaudio>=2.0.0
+transformers>=4.30.0
+spaces>=0.10.0
+git+https://github.com/sudoping01/whosper.git