Spaces:

codenamewei
/

speech-to-text

Runtime error

App Files Files Community

codenamewei commited on Jul 2, 2022

Commit

ca62577

1 Parent(s): 947d6c6

initial commit

Browse files

Files changed (1) hide show

app.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+from transformers import Wav2Vec2Processor
+from transformers import AutoModelForCTC
+from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
+from transformers import Wav2Vec2Processor
+from transformers import AutoModelForCTC
+from conversationalnlp.models.wav2vec2 import ModelLoader
+from conversationalnlp.utils import *
+import soundfile as sf
+import os
+"""
+run gradio with
+>>python app.py
+"""
+# audiosavepath = r"C:\Users\codenamewei\Documents\nlp-meeting-data\gradio-inference"
+# pretrained_model = "codenamewei/speech-to-text"
+# processor = Wav2Vec2Processor.from_pretrained(
+#     pretrained_model, use_auth_token=True)
+# model = AutoModelForCTC.from_pretrained(
+#     pretrained_model,
+#     use_auth_token=True)
+# modelloader = ModelLoader(model, processor)
+# predictor = Wav2Vec2Predict(modelloader)
+def greet(audioarray):
+    """
+    audio array in the following format
+    (16000, array([ -5277184,    326400,   -120320, ...,  -5970432, -12745216,
+        -6934528], dtype=int32))
+    <class 'tuple'>
+    """
+    # audioabspath = os.path.join(
+    #     audiosavepath, customdatetime.getstringdatetime() + ".wav")
+    # # WORKAROUND: Save to file and reread to get the array shape needed for prediction
+    # sf.write(audioabspath, audioarray[1], audioarray[0])
+    # print(f"Audio at path {audioabspath}")
+    # predictiontexts = predictor.predictfiles([audioabspath])
+    # outputtext = predictiontexts["predicted_text"][-1] + \
+    #     "\n" + predictiontexts["corrected_text"][-1]
+    return outputtext
+demo = gr.Interface(fn=greet, inputs="audio",
+                    outputs="text", title="Speech-to-Text")
+demo.launch()  # share=True)