|
|
|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
|
|
pipe = pipeline("automatic-speech-recognition", model="Futuresony/whisper-small-sw") |
|
|
|
|
|
def transcribe(audio): |
|
if audio is None: |
|
return "Please upload or record an audio file." |
|
print("Transcribing audio...") |
|
result = pipe(audio)["text"] |
|
return result |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# ποΈ Swahili Speech-to-Text Transcription App") |
|
|
|
with gr.Row(): |
|
audio_input = gr.Audio(source="microphone", type="filepath", label="π€ Record Audio") |
|
file_input = gr.Audio(source="upload", type="filepath", label="π Upload Audio File") |
|
|
|
transcribe_button = gr.Button("Transcribe") |
|
output_text = gr.Textbox(label="π Transcription Output") |
|
|
|
transcribe_button.click(transcribe, inputs=[audio_input], outputs=output_text) |
|
transcribe_button.click(transcribe, inputs=[file_input], outputs=output_text) |
|
|
|
|
|
demo.launch() |
|
|