Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import Wav2Vec2Processor | |
from transformers import AutoModelForCTC | |
from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict | |
from conversationalnlp.models.wav2vec2 import ModelLoader | |
from conversationalnlp.utils import * | |
import soundfile as sf | |
import os | |
""" | |
run gradio with | |
>>python app.py | |
""" | |
audioheaderpath = os.path.join( | |
os.getcwd(), "temp") | |
pretrained_model = "codenamewei/speech-to-text" | |
processor = Wav2Vec2Processor.from_pretrained( | |
pretrained_model) | |
model = AutoModelForCTC.from_pretrained( | |
pretrained_model) | |
modelloader = ModelLoader(model, processor) | |
predictor = Wav2Vec2Predict(modelloader) | |
audiofileexamples = ["example1.flac", "example2.flac"] | |
fileextension = ".wav" | |
def greet(*args): | |
""" | |
List[tuple, tuple] | |
mic: param[0] (int, np.array) | |
audiofile: param[1] (int, np.array) | |
""" | |
dictinput = dict(mic=args[0], file=args[1]) | |
audiofiles = [] | |
for key, audioarray in dictinput.items(): | |
if audioarray is not None: | |
# WORKAROUND: Save to file and reread to get the array shape needed for prediction | |
audioabspath = audioheaderpath + "_" + key + fileextension | |
print(f"Audio at path {audioabspath}") | |
sf.write(audioabspath, | |
audioarray[1], audioarray[0]) | |
audiofiles.append(audioabspath) | |
predictiontexts = predictor.predictfiles(audiofiles) | |
mictext = predictiontexts["predicted_text"][0] + "\n" + \ | |
predictiontexts["corrected_text"][0] if dictinput['mic'] is not None else "" | |
filetext = predictiontexts["predicted_text"][-1] + "\n" + \ | |
predictiontexts["corrected_text"][-1] if dictinput['file'] is not None else "" | |
return [mictext, filetext] | |
demo = gr.Interface(fn=greet, | |
inputs=["mic", "audio"], | |
outputs=["text", "text"], | |
title="Speech-to-Text", | |
examples=[audiofileexamples]) | |
demo.launch() # share=True) | |