codenamewei commited on
Commit
ca62577
·
1 Parent(s): 947d6c6

initial commit

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import Wav2Vec2Processor
3
+ from transformers import AutoModelForCTC
4
+ from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
5
+ from transformers import Wav2Vec2Processor
6
+ from transformers import AutoModelForCTC
7
+ from conversationalnlp.models.wav2vec2 import ModelLoader
8
+ from conversationalnlp.utils import *
9
+ import soundfile as sf
10
+ import os
11
+
12
+ """
13
+ run gradio with
14
+ >>python app.py
15
+ """
16
+
17
+ # audiosavepath = r"C:\Users\codenamewei\Documents\nlp-meeting-data\gradio-inference"
18
+
19
+ # pretrained_model = "codenamewei/speech-to-text"
20
+
21
+ # processor = Wav2Vec2Processor.from_pretrained(
22
+ # pretrained_model, use_auth_token=True)
23
+
24
+ # model = AutoModelForCTC.from_pretrained(
25
+ # pretrained_model,
26
+ # use_auth_token=True)
27
+
28
+ # modelloader = ModelLoader(model, processor)
29
+
30
+ # predictor = Wav2Vec2Predict(modelloader)
31
+
32
+
33
+ def greet(audioarray):
34
+ """
35
+ audio array in the following format
36
+
37
+ (16000, array([ -5277184, 326400, -120320, ..., -5970432, -12745216,
38
+ -6934528], dtype=int32))
39
+ <class 'tuple'>
40
+ """
41
+ # audioabspath = os.path.join(
42
+ # audiosavepath, customdatetime.getstringdatetime() + ".wav")
43
+ # # WORKAROUND: Save to file and reread to get the array shape needed for prediction
44
+ # sf.write(audioabspath, audioarray[1], audioarray[0])
45
+
46
+ # print(f"Audio at path {audioabspath}")
47
+ # predictiontexts = predictor.predictfiles([audioabspath])
48
+ # outputtext = predictiontexts["predicted_text"][-1] + \
49
+ # "\n" + predictiontexts["corrected_text"][-1]
50
+
51
+ return outputtext
52
+
53
+
54
+ demo = gr.Interface(fn=greet, inputs="audio",
55
+ outputs="text", title="Speech-to-Text")
56
+
57
+ demo.launch() # share=True)