Spaces:
Runtime error
Runtime error
File size: 2,014 Bytes
ca62577 1358486 ca62577 d675859 ca62577 d675859 ca62577 1358486 ca62577 1358486 3d46958 ca62577 1358486 3d46958 ca62577 1358486 ca62577 1358486 ca62577 d675859 bbe5135 d675859 ca62577 d675859 ca62577 d675859 1358486 d675859 1358486 d675859 ca62577 d675859 ca62577 d675859 bbe5135 d675859 ca62577 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from transformers import Wav2Vec2Processor
from transformers import AutoModelForCTC
from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
from conversationalnlp.models.wav2vec2 import ModelLoader
from conversationalnlp.utils import *
import soundfile as sf
import os
"""
run gradio with
>>python app.py
"""
audioheaderpath = os.path.join(
os.getcwd(), "temp")
pretrained_model = "codenamewei/speech-to-text"
processor = Wav2Vec2Processor.from_pretrained(
pretrained_model)
model = AutoModelForCTC.from_pretrained(
pretrained_model)
modelloader = ModelLoader(model, processor)
predictor = Wav2Vec2Predict(modelloader)
audiofileexamples = ["example1.flac", "example2.flac"]
fileextension = ".wav"
def greet(*args):
"""
List[tuple, tuple]
mic: param[0] (int, np.array)
audiofile: param[1] (int, np.array)
"""
dictinput = dict(mic=args[0], file=args[1])
audiofiles = []
for key, audioarray in dictinput.items():
if audioarray is not None:
# WORKAROUND: Save to file and reread to get the array shape needed for prediction
audioabspath = audioheaderpath + "_" + key + fileextension
print(f"Audio at path {audioabspath}")
sf.write(audioabspath,
audioarray[1], audioarray[0])
audiofiles.append(audioabspath)
predictiontexts = predictor.predictfiles(audiofiles)
mictext = predictiontexts["predicted_text"][0] + "\n" + \
predictiontexts["corrected_text"][0] if dictinput['mic'] is not None else ""
filetext = predictiontexts["predicted_text"][-1] + "\n" + \
predictiontexts["corrected_text"][-1] if dictinput['file'] is not None else ""
return [mictext, filetext]
demo = gr.Interface(fn=greet,
inputs=["mic", "audio"],
outputs=["text", "text"],
title="Speech-to-Text",
examples=[audiofileexamples])
demo.launch() # share=True)
|