Spaces:

codenamewei
/

speech-to-text

Runtime error

App Files Files Community

speech-to-text / app.py

codenamewei

Input with mic and file

d675859 over 2 years ago

raw

history blame contribute delete

2.01 kB

	import gradio as gr
	from transformers import Wav2Vec2Processor
	from transformers import AutoModelForCTC
	from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
	from conversationalnlp.models.wav2vec2 import ModelLoader
	from conversationalnlp.utils import *
	import soundfile as sf
	import os

	"""
	run gradio with
	>>python app.py
	"""

	audioheaderpath = os.path.join(
	os.getcwd(), "temp")


	pretrained_model = "codenamewei/speech-to-text"

	processor = Wav2Vec2Processor.from_pretrained(
	pretrained_model)

	model = AutoModelForCTC.from_pretrained(
	pretrained_model)

	modelloader = ModelLoader(model, processor)

	predictor = Wav2Vec2Predict(modelloader)

	audiofileexamples = ["example1.flac", "example2.flac"]

	fileextension = ".wav"


	def greet(*args):
	"""
	List[tuple, tuple]
	mic: param[0] (int, np.array)
	audiofile: param[1] (int, np.array)
	"""

	dictinput = dict(mic=args[0], file=args[1])
	audiofiles = []

	for key, audioarray in dictinput.items():

	if audioarray is not None:
	# WORKAROUND: Save to file and reread to get the array shape needed for prediction

	audioabspath = audioheaderpath + "_" + key + fileextension
	print(f"Audio at path {audioabspath}")
	sf.write(audioabspath,
	audioarray[1], audioarray[0])
	audiofiles.append(audioabspath)

	predictiontexts = predictor.predictfiles(audiofiles)

	mictext = predictiontexts["predicted_text"][0] + "\n" + \
	predictiontexts["corrected_text"][0] if dictinput['mic'] is not None else ""
	filetext = predictiontexts["predicted_text"][-1] + "\n" + \
	predictiontexts["corrected_text"][-1] if dictinput['file'] is not None else ""

	return [mictext, filetext]


	demo = gr.Interface(fn=greet,
	inputs=["mic", "audio"],
	outputs=["text", "text"],
	title="Speech-to-Text",
	examples=[audiofileexamples])

	demo.launch() # share=True)