Spaces:

YosefA
/

AmharicNLP

Sleeping

App Files Files Community

AmharicNLP / app.py

YosefAyele

make it possible to record audio from within the app

a17627f 6 months ago

raw

history blame

2.29 kB

	import gradio as gr
	from speechbrain.inference.ASR import EncoderASR
	from pydub import AudioSegment
	import os
	import numpy as np
	from scipy.io.wavfile import write


	# Load the ASR model
	asr_model = EncoderASR.from_hparams(
	source="YosefA/wave2vec2_amharic_stt",
	savedir="pretrained_models/asr-wav2vec2-amharic"
	)

	# Directory to store converted audio files
	os.makedirs("temp_audio", exist_ok=True)

	def transcribe_audio(audio_file):
	"""
	Converts the uploaded audio to .wav format, runs transcription, and returns the result.
	"""
	# Save the uploaded audio to a temporary location
	temp_audio_path = "temp_audio/input_audio.wav"

	# Convert audio to .wav format
	sound = AudioSegment.from_file(audio_file)
	sound.export(temp_audio_path, format="wav")

	# Transcribe the audio
	transcription = asr_model.transcribe_file(temp_audio_path)

	# Clean up temporary files (optional)
	os.remove(temp_audio_path)

	return transcription

	def save_audio_to_file(audio_data, file_path="temp_audio/input_audio.wav"):
	"""
	Converts Gradio audio data (numpy array and sample rate) to a .wav file.
	"""
	audio_array, sample_rate = audio_data
	# Normalize and save as .wav
	os.makedirs(os.path.dirname(file_path), exist_ok=True)
	write(file_path, sample_rate, np.int16(audio_array * 32767)) # Convert float32 to int16
	return file_path

	def process_audio(audio_data):
	"""
	Processes recorded/uploaded audio, saves it, and sends it to the transcribe_audio function.
	"""
	temp_audio_path = save_audio_to_file(audio_data)
	transcription = transcribe_audio(temp_audio_path)
	return transcription


	# Define the Gradio interface
	with gr.Blocks() as app:
	gr.Markdown("### Amharic Speech-to-Text Transcription App")
	gr.Markdown("Upload or record an audio file in any format, and get its transcription.")

	with gr.Row():
	audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
	transcription_output = gr.Textbox(label="Transcription")

	transcribe_button = gr.Button("Transcribe")
	transcribe_button.click(transcribe_audio, inputs=audio_input, outputs=transcription_output)

	# Launch the app
	if __name__ == "__main__":
	app.launch()