AmharicNLP / app.py
YosefA's picture
Update app.py
5076d03 verified
raw
history blame
1.84 kB
import gradio as gr
from speechbrain.inference.ASR import EncoderASR
from pydub import AudioSegment
import os
import numpy as np
from scipy.io.wavfile import write
import time
# Load the ASR model
asr_model = EncoderASR.from_hparams(
source="YosefA/wave2vec2_amharic_stt",
savedir="pretrained_models/asr-wav2vec2-amharic"
)
# Directory to store converted audio files
os.makedirs("temp_audio", exist_ok=True)
def transcribe_audio(audio_file):
"""
Converts the uploaded audio to .wav format, runs transcription, and returns the result.
"""
# Save the uploaded audio to a temporary location
temp_audio_path = "temp_audio/input_audio.wav"
# Convert audio to .wav format
sound = AudioSegment.from_file(audio_file)
sound.export(temp_audio_path, format="wav")
# Transcribe the audio
transcription = asr_model.transcribe_file(temp_audio_path)
# Clean up temporary files (optional)
os.remove(temp_audio_path)
return transcription
def process_audio(audio_data):
"""
Processes recorded/uploaded audio, saves it, and sends it to the transcribe_audio function.
"""
transcription = transcribe_audio(audio_data)
return transcription
# Define the Gradio interface
with gr.Blocks() as app:
gr.Markdown("### Amharic Speech-to-Text Transcription App")
gr.Markdown("Upload or record an audio file in any format, and get its transcription.")
with gr.Row():
audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
transcription_output = gr.Textbox(label="Transcription")
transcribe_button = gr.Button("Transcribe")
transcribe_button.click(process_audio, inputs=audio_input, outputs=transcription_output)
# Just comment
# Launch the app
if __name__ == "__main__":
app.launch()