codenamewei commited on
Commit
1358486
·
1 Parent(s): 50a2354
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +27 -27
  3. requirements.txt +4 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ temp.wav
app.py CHANGED
@@ -1,33 +1,33 @@
1
  import gradio as gr
2
- # from transformers import Wav2Vec2Processor
3
- # from transformers import AutoModelForCTC
4
- # from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
5
- # from transformers import Wav2Vec2Processor
6
- # from transformers import AutoModelForCTC
7
- # from conversationalnlp.models.wav2vec2 import ModelLoader
8
- # from conversationalnlp.utils import *
9
- # import soundfile as sf
10
- # import os
11
 
12
  """
13
  run gradio with
14
  >>python app.py
15
  """
16
 
17
- # audiosavepath = r"C:\Users\codenamewei\Documents\nlp-meeting-data\gradio-inference"
18
 
19
- # pretrained_model = "codenamewei/speech-to-text"
20
 
21
- # processor = Wav2Vec2Processor.from_pretrained(
22
- # pretrained_model, use_auth_token=True)
23
 
24
- # model = AutoModelForCTC.from_pretrained(
25
- # pretrained_model,
26
- # use_auth_token=True)
27
 
28
- # modelloader = ModelLoader(model, processor)
29
 
30
- # predictor = Wav2Vec2Predict(modelloader)
31
 
32
 
33
  def greet(audioarray):
@@ -38,15 +38,15 @@ def greet(audioarray):
38
  -6934528], dtype=int32))
39
  <class 'tuple'>
40
  """
41
- # audioabspath = os.path.join(
42
- # audiosavepath, customdatetime.getstringdatetime() + ".wav")
43
- # # WORKAROUND: Save to file and reread to get the array shape needed for prediction
44
- # sf.write(audioabspath, audioarray[1], audioarray[0])
45
-
46
- # print(f"Audio at path {audioabspath}")
47
- # predictiontexts = predictor.predictfiles([audioabspath])
48
- # outputtext = predictiontexts["predicted_text"][-1] + \
49
- # "\n" + predictiontexts["corrected_text"][-1]
50
 
51
  return outputtext
52
 
 
1
  import gradio as gr
2
+ from transformers import Wav2Vec2Processor
3
+ from transformers import AutoModelForCTC
4
+ from conversationalnlp.models.wav2vec2 import Wav2Vec2Predict
5
+ from transformers import Wav2Vec2Processor
6
+ from transformers import AutoModelForCTC
7
+ from conversationalnlp.models.wav2vec2 import ModelLoader
8
+ from conversationalnlp.utils import *
9
+ import soundfile as sf
10
+ import os
11
 
12
  """
13
  run gradio with
14
  >>python app.py
15
  """
16
 
17
+ audiosavepath = os.getcwd()
18
 
19
+ pretrained_model = "codenamewei/speech-to-text"
20
 
21
+ processor = Wav2Vec2Processor.from_pretrained(
22
+ pretrained_model, use_auth_token=True)
23
 
24
+ model = AutoModelForCTC.from_pretrained(
25
+ pretrained_model,
26
+ use_auth_token=True)
27
 
28
+ modelloader = ModelLoader(model, processor)
29
 
30
+ predictor = Wav2Vec2Predict(modelloader)
31
 
32
 
33
  def greet(audioarray):
 
38
  -6934528], dtype=int32))
39
  <class 'tuple'>
40
  """
41
+ audioabspath = os.path.join(audiosavepath, "temp.wav")
42
+
43
+ # WORKAROUND: Save to file and reread to get the array shape needed for prediction
44
+ sf.write(audioabspath, audioarray[1], audioarray[0])
45
+
46
+ print(f"Audio at path {audioabspath}")
47
+ predictiontexts = predictor.predictfiles([audioabspath])
48
+ outputtext = predictiontexts["predicted_text"][-1] + \
49
+ "\n" + predictiontexts["corrected_text"][-1]
50
 
51
  return outputtext
52
 
requirements.txt CHANGED
@@ -1 +1,4 @@
1
- gradio
 
 
 
 
1
+ gradio
2
+ conversationalnlp
3
+ transformers
4
+ SoundFile