akki2825 commited on
Commit
b0f1b91
·
1 Parent(s): 249fead

Create run.py

Browse files
Files changed (1) hide show
  1. run.py +57 -0
run.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deepspeech import Model
2
+ import gradio as gr
3
+ import numpy as np
4
+ import urllib.request
5
+
6
+ model_file_path = "deepspeech-0.9.3-models.pbmm"
7
+ lm_file_path = "deepspeech-0.9.3-models.scorer"
8
+ url = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/"
9
+
10
+ urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)
11
+ urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)
12
+
13
+ beam_width = 100
14
+ lm_alpha = 0.93
15
+ lm_beta = 1.18
16
+
17
+ model = Model(model_file_path)
18
+ model.enableExternalScorer(lm_file_path)
19
+ model.setScorerAlphaBeta(lm_alpha, lm_beta)
20
+ model.setBeamWidth(beam_width)
21
+
22
+
23
+ def reformat_freq(sr, y):
24
+ if sr not in (
25
+ 48000,
26
+ 16000,
27
+ ): # Deepspeech only supports 16k, (we convert 48k -> 16k)
28
+ raise ValueError("Unsupported rate", sr)
29
+ if sr == 48000:
30
+ y = (
31
+ ((y / max(np.max(y), 1)) * 32767)
32
+ .reshape((-1, 3))
33
+ .mean(axis=1)
34
+ .astype("int16")
35
+ )
36
+ sr = 16000
37
+ return sr, y
38
+
39
+
40
+ def transcribe(speech, stream):
41
+ _, y = reformat_freq(*speech)
42
+ if stream is None:
43
+ stream = model.createStream()
44
+ stream.feedAudioContent(y)
45
+ text = stream.intermediateDecode()
46
+ return text, stream
47
+
48
+
49
+ demo = gr.Interface(
50
+ transcribe,
51
+ [gr.Audio(source="microphone", streaming=True), "state"],
52
+ ["text", "state"],
53
+ live=True,
54
+ )
55
+
56
+ if __name__ == "__main__":
57
+ demo.launch()