Spaces:

MALIBA-AI
/

BambaraSpeechToText

Running on Zero

App Files Files Community

sudoping01 commited on 13 days ago

Commit

6f340af

verified ·

1 Parent(s): 13fccc4

Update app.py

Browse files

Files changed (1) hide show

app.py +152 -210

app.py CHANGED Viewed

@@ -1,246 +1,188 @@
-import os
-import spaces
-import torch
-import torchaudio
 import gradio as gr
-import logging
-from whosper import WhosperTranscriber
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-if torch.cuda.is_available():
-    device = "cuda"
-    logger.info("Using CUDA for inference.")
-elif torch.backends.mps.is_available():
-    device = "mps"
-    logger.info("Using MPS for inference.")
-else:
-    device = "cpu"
-    logger.info("Using CPU for inference.")
-model_id = "sudoping01/maliba-asr-v1"
-transcriber = WhosperTranscriber(model_id=model_id)
-logger.info(f"Transcriber initialized with model: {model_id}")
-def resample_audio(audio_path, target_sample_rate=16000):
-    """
-    Converts the audio file to the target sampling rate (16000 Hz).
-    Args:
-        audio_path (str): Path to the audio file.
-        target_sample_rate (int): The desired sample rate.
-    Returns:
-        A tensor containing the resampled audio data and the target sample rate.
-    """
-    try:
-        waveform, original_sample_rate = torchaudio.load(audio_path)
-        if original_sample_rate != target_sample_rate:
-            resampler = torchaudio.transforms.Resample(
-                orig_freq=original_sample_rate,
-                new_freq=target_sample_rate
-            )
-            waveform = resampler(waveform)
-        return waveform, target_sample_rate
-    except Exception as e:
-        logger.error(f"Error resampling audio: {e}")
-        raise e
-@spaces.GPU()
-def transcribe_audio(audio_file):
-    """
-    Transcribes the provided audio file into Bambara text using Whosper.
-    Args:
-        audio_file: The path to the audio file to transcribe.
-    Returns:
-        A string representing the transcribed Bambara text.
-    """
-    if audio_file is None:
-        return "Please provide an audio file for transcription."
-    try:
-        logger.info(f"Transcribing audio file: {audio_file}")
-        result = transcriber.transcribe_audio(audio_file)
-        logger.info("Transcription successful.")
-        return result
-    except Exception as e:
-        logger.error(f"Transcription failed: {e}")
-        return f"Error during transcription: {str(e)}"
-def get_example_files(directory="./examples"):
-    """
-    Returns a list of audio files from the examples directory.
-    Args:
-        directory (str): The directory to search for audio files.
-    Returns:
-        list: A list of paths to the audio files.
-    """
-    if not os.path.exists(directory):
-        logger.warning(f"Examples directory {directory} not found.")
-        return []
-    audio_extensions = ['.wav', '.mp3', '.m4a', '.flac', '.ogg']
-    audio_files = []
-    try:
-        files = os.listdir(directory)
-        for file in files:
-            if any(file.lower().endswith(ext) for ext in audio_extensions):
-                full_path = os.path.abspath(os.path.join(directory, file))
-                audio_files.append(full_path)
-        logger.info(f"Found {len(audio_files)} example audio files.")
-        return audio_files[:5]
-    except Exception as e:
-        logger.error(f"Error reading examples directory: {e}")
-        return []
 def build_interface():
     """
-    Builds the Gradio interface for Bambara speech recognition.
     """
     example_files = get_example_files()
-    with gr.Blocks(title="Bambara Speech Recognition") as demo:
         gr.Markdown(
             """
-            # 🎤 Bambara Automatic Speech Recognition
-            **Powered by MALIBA-AI**
-            Convert Bambara speech to text using our state-of-the-art ASR model. You can either:
-            - 🎙️ **Record** your voice directly
-            - 📁 **Upload** an audio file
-            - 🎵 **Try** our example audio files
-            ## Supported Audio Formats
-            WAV, MP3, M4A, FLAC, OGG
             """
         )
-        with gr.Row():
-            with gr.Column():
                 audio_input = gr.Audio(
-                    label="🎤 Record or Upload Audio",
                     type="filepath",
-                    sources=["microphone", "upload"]
                 )
-                transcribe_btn = gr.Button(
-                    "🔄 Transcribe Audio",
-                    variant="primary",
-                    size="lg"
                 )
-                clear_btn = gr.Button("🗑️ Clear", variant="secondary")
-            with gr.Column():
                 output_text = gr.Textbox(
-                    label="📝 Transcribed Text (Bambara)",
-                    lines=8,
                     placeholder="Your transcribed Bambara text will appear here...",
-                    interactive=False
                 )
-        # Examples section
         if example_files:
-            gr.Markdown("## 🎵 Try These Examples")
-            gr.Examples(
-                examples=[[f] for f in example_files],
-                inputs=[audio_input],
-                outputs=output_text,
-                fn=transcribe_audio,
-                cache_examples=False,
-                label="Example Audio Files"
-            )
-        # Information section
         gr.Markdown(
             """
-            ---
-            ## ℹ️ About This Model
-            - **Model:** [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
-            - **Developer:** MALIBA-AI
-            - **Language:** Bambara (bm)
-            - **Task:** Automatic Speech Recognition (ASR)
-            - **Sample Rate:** 16kHz (automatically resampled)
-            ## 🚀 How to Use
-            1. **Record Audio:** Click the microphone button and speak in Bambara
-            2. **Upload File:** Click the upload button to select an audio file
-            3. **Transcribe:** Click the "Transcribe Audio" button
-            4. **View Results:** See your transcribed text in Bambara
-            ## 📊 Performance Notes
-            - Best results with clear speech and minimal background noise
-            - Supports various audio formats and durations
-            - Optimized for Bambara language patterns and phonetics
             """
         )
-        transcribe_btn.click(
-            fn=transcribe_audio,
-            inputs=[audio_input],
-            outputs=output_text,
-            show_progress=True
-        )
-        clear_btn.click(
-            fn=lambda: (None, ""),
-            outputs=[audio_input, output_text]
-        )
         audio_input.change(
-            fn=transcribe_audio,
-            inputs=[audio_input],
-            outputs=output_text,
             show_progress=True
         )
-    return demo
-def main():
-    """
-    Main function to launch the Gradio interface.
-    """
-    logger.info("Starting Bambara ASR Gradio interface.")
-    interface = build_interface()
-    interface.launch(
-        share=False,
-        server_name="0.0.0.0",
-        server_port=7860
-    )
-    logger.info("Gradio interface launched successfully.")
-if __name__ == "__main__":
-    main()

 import gradio as gr
+import os
 def build_interface():
     """
+    Builds an enhanced Gradio interface for Bambara speech recognition.
     """
     example_files = get_example_files()
+    custom_css = """
+    .gr-button-primary {
+        background-color: #2c5282 !important;
+        color: white !important;
+        border-radius: 8px !important;
+        font-weight: bold !important;
+    }
+    .gr-button-secondary {
+        background-color: #e2e8f0 !important;
+        color: #2d3748 !important;
+        border-radius: 8px !important;
+    }
+    .example-container {
+        background-color: #f7fafc;
+        padding: 16px;
+        border-radius: 8px;
+        margin-top: 16px;
+    }
+    .gr-textbox {
+        border-radius: 8px !important;
+        border: 1px solid #cbd5e0 !important;
+    }
+    .gr-audio {
+        border-radius: 8px !important;
+    }
+    .header {
+        text-align: center;
+        color: #2d3748;
+    }
+    .info-section {
+        background-color: #edf2f7;
+        padding: 16px;
+        border-radius: 8px;
+        margin-top: 16px;
+    }
+    """
+    with gr.Blocks(title="Bambara Speech Recognition", css=custom_css) as demo:
+        # Header
         gr.Markdown(
             """
+            <h1 class="header">🎤 Bambara Speech Recognition</h1>
+            <p style="text-align: center; color: #4a5568;">
+                Powered by <b>MALIBA-AI</b> | Convert Bambara speech to text effortlessly
+            </p>
             """
         )
+        # Main interaction section
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("### 🎙️ Record or Upload Audio")
                 audio_input = gr.Audio(
+                    label="Record or Upload Audio",
                     type="filepath",
+                    sources=["microphone", "upload"],
+                    show_label=False
                 )
+                audio_preview = gr.Audio(
+                    label="Preview Your Audio",
+                    interactive=False,
+                    visible=False
                 )
+                with gr.Row():
+                    transcribe_btn = gr.Button(
+                        "🔄 Transcribe Audio",
+                        variant="primary",
+                        size="lg"
+                    )
+                    clear_btn = gr.Button(
+                        "🗑️ Clear",
+                        variant="secondary",
+                        size="lg"
+                    )
+            with gr.Column(scale=1):
+                gr.Markdown("### 📝 Transcription Output")
                 output_text = gr.Textbox(
+                    label="Transcribed Text (Bambara)",
+                    lines=6,
                     placeholder="Your transcribed Bambara text will appear here...",
+                    interactive=False,
+                    show_copy_button=True
                 )
+                status_message = gr.Markdown(
+                    value="",
+                    visible=False
+                )
+        # Example audio section
         if example_files:
+            gr.Markdown("## 🎵 Try Example Audio Files")
+            with gr.Group(elem_classes="example-container"):
+                gr.Markdown(
+                    """
+                    Listen to these sample Bambara audio files and transcribe them with one click.
+                    """
+                )
+                for idx, file in enumerate(example_files):
+                    with gr.Row():
+                        gr.Audio(
+                            value=file,
+                            label=f"Example {idx + 1}: {os.path.basename(file)}",
+                            interactive=False,
+                            show_label=True
+                        )
+                        gr.Button(
+                            f"Transcribe Example {idx + 1}",
+                            variant="primary",
+                            size="sm"
+                        ).click(
+                            fn=transcribe_audio,
+                            inputs=gr.State(value=file),
+                            outputs=[output_text, status_message],
+                            show_progress=True,
+                            _js="() => {return {show_progress: true}}"
+                        )
         gr.Markdown(
             """
+            <div class="info-section">
+            ## ℹ️ How to Use
+            1. **Record**: Click the microphone to speak in Bambara.
+            2. **Upload**: Select an audio file (WAV, MP3, M4A, FLAC, OGG).
+            3. **Transcribe**: Click "Transcribe Audio" or try an example.
+            4. **View**: See the transcribed text in Bambara.
+            ## 📊 Model Details
+            - **Model**: [sudoping01/maliba-asr-v1](https://huggingface.co/sudoping01/maliba-asr-v1)
+            - **Language**: Bambara (bm)
+            - **Sample Rate**: 16kHz (auto-resampled)
+            - **Best for**: Clear speech with minimal background noise
+            </div>
             """
         )
+        def update_audio_preview(audio_file):
+            return gr.update(value=audio_file, visible=True), ""
+        def clear_inputs():
+            return None, "", gr.update(visible=False), ""
+        def transcribe_with_status(audio_file):
+            if not audio_file:
+                return "", "**Error**: Please provide an audio file."
+            result = transcribe_audio(audio_file)
+            if "Error" in result:
+                return result, f"**Error**: {result}"
+            return result, "**Success**: Transcription completed!"
         audio_input.change(
+            fn=update_audio_preview,
+            inputs=audio_input,
+            outputs=[audio_preview, status_message]
+        ).then(
+            fn=transcribe_with_status,
+            inputs=audio_input,
+            outputs=[output_text, status_message],
             show_progress=True
         )
+        transcribe_btn.click(
+            fn=transcribe_with_status,
+            inputs=audio_input,
+            outputs=[output_text, status_message],
+            show_progress=True
+        )
+        clear_btn.click(
+            fn=clear_inputs,
+            outputs=[audio_input, output_text, audio_preview, status_message]
+        )
+    return demo