Spaces:

YosefA
/

AmharicNLP

Sleeping

App Files Files Community

AmharicNLP / app.py

YosefA

Update app.py

5076d03 verified 13 days ago

raw

history blame

1.84 kB

	import gradio as gr
	from speechbrain.inference.ASR import EncoderASR
	from pydub import AudioSegment
	import os
	import numpy as np
	from scipy.io.wavfile import write
	import time


	# Load the ASR model
	asr_model = EncoderASR.from_hparams(
	source="YosefA/wave2vec2_amharic_stt",
	savedir="pretrained_models/asr-wav2vec2-amharic"
	)

	# Directory to store converted audio files
	os.makedirs("temp_audio", exist_ok=True)

	def transcribe_audio(audio_file):
	"""
	Converts the uploaded audio to .wav format, runs transcription, and returns the result.
	"""
	# Save the uploaded audio to a temporary location
	temp_audio_path = "temp_audio/input_audio.wav"

	# Convert audio to .wav format
	sound = AudioSegment.from_file(audio_file)
	sound.export(temp_audio_path, format="wav")

	# Transcribe the audio
	transcription = asr_model.transcribe_file(temp_audio_path)

	# Clean up temporary files (optional)
	os.remove(temp_audio_path)

	return transcription

	def process_audio(audio_data):
	"""
	Processes recorded/uploaded audio, saves it, and sends it to the transcribe_audio function.
	"""
	transcription = transcribe_audio(audio_data)
	return transcription


	# Define the Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("### Amharic Speech-to-Text Transcription App")
	gr.Markdown("Upload or record an audio file in any format, and get its transcription.")

	with gr.Row():
	audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
	transcription_output = gr.Textbox(label="Transcription")

	transcribe_button = gr.Button("Transcribe")
	transcribe_button.click(process_audio, inputs=audio_input, outputs=transcription_output)
	# Just comment

	# Launch the app
	if __name__ == "__main__":
	app.launch()