Spaces:

DigitalUmuganda
/

Kinyarwanda-asr

Runtime error

App Files Files Community

Kinyarwanda-asr / app_upload_model_input.py

rutsam

push the code

3119dd6 about 2 years ago

raw

history blame

2.02 kB

	import gradio as gr
	import librosa
	import soundfile as sf
	import torch
	import warnings
	import os
	from transformers import Wav2Vec2ProcessorWithLM, Wav2Vec2CTCTokenizer, Wav2Vec2Model


	warnings.filterwarnings("ignore")

	from speechbrain.pretrained import EncoderDecoderASR

	asr_model = EncoderDecoderASR.from_hparams(source="speechbrain/asr-wav2vec2-commonvoice-rw", savedir="pretrained_models/asr-wav2vec2-commonvoice-rw")
	#asr_model.transcribe_file("speechbrain/asr-wav2vec2-commonvoice-rw/example.mp3")



	# define speech-to-text function
	def asr_transcript(audio, audio_microphone, model_params):


	audio = audio_microphone if audio_microphone else audio

	if audio == None and audio_microphone == None:
	return "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)", "Please provide audio by uploading a file or by recording audio using microphone by pressing Record (And allow usage of microphone)"
	text = ""

	if audio:
	text = asr_model.transcribe_file(audio.name)

	return text
	else:
	return "File not valid"

	gradio_ui = gr.Interface(
	fn=asr_transcript,
	title="Kinyarwanda Speech Recognition",
	description="Upload an audio clip or record from browser using microphone, and let AI do the hard work of transcribing.",
	article = """
	This demo showcases the pretrained model from deepspeech.
	""",
	inputs=[gr.inputs.Audio(label="Upload Audio File", type="file", optional=True), gr.inputs.Audio(source="microphone", type="file", optional=True, label="Record from microphone"), gr.inputs.Dropdown(choices=["deepspeech","coqui (soon)"], type="value", default="deepspeech", label="Select speech recognition model ", optional=False)],
	outputs=[gr.outputs.Textbox(label="Recognized speech")],
	examples = [["sample_1.wav","sample_1.wav","deepspeech"],["sample_2.wav","sample_2.wav","deepspeech"]]
	)

	gradio_ui.launch(enable_queue=True)