kmknair commited on
Commit
478bedf
·
1 Parent(s): 2c3f8ff

tabbed app add

Browse files
Files changed (1) hide show
  1. app.py +66 -6
app.py CHANGED
@@ -4,17 +4,35 @@ import time
4
 
5
  # p = pipeline("automatic-speech-recognition", model="/Users/mkesavan/aidev/speechAI-trials/xlsr-wave2vec/wav2vec2-large-xls-r-300m-tamil-colab/checkpoint-1600")
6
 
7
- p = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-tamil")
8
 
9
- def transcribe(audio, state=""):
 
 
 
 
 
 
 
 
10
  time.sleep(2)
11
- text = p(audio)["text"]
12
  state += text + " "
13
  return state, state
14
 
 
 
 
 
 
 
 
 
 
15
 
16
- gr.Interface(
17
- fn=transcribe,
 
18
  inputs=[
19
  gr.Audio(source="microphone", type="filepath", streaming=True),
20
  "state"
@@ -23,4 +41,46 @@ gr.Interface(
23
  "textbox",
24
  "state"
25
  ],
26
- live=True).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  # p = pipeline("automatic-speech-recognition", model="/Users/mkesavan/aidev/speechAI-trials/xlsr-wave2vec/wav2vec2-large-xls-r-300m-tamil-colab/checkpoint-1600")
6
 
7
+ # combining Tamil and arabic
8
 
9
+ p_ta = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-tamil")
10
+ p_ar = pipeline("automatic-speech-recognition", model="kmknair/wav2vec2-xlsr-arabic")
11
+
12
+
13
+ def transcribe_ta(audio):
14
+ text = p_ta(audio)["text"]
15
+ return text
16
+
17
+ def transcribe_ta_stream(audio, state=""):
18
  time.sleep(2)
19
+ text = p_ta(audio)["text"]
20
  state += text + " "
21
  return state, state
22
 
23
+ def transcribe_ar(audio):
24
+ text = p_ar(audio)["text"]
25
+ return text
26
+
27
+ def transcribe_ar_stream(audio, state=""):
28
+ time.sleep(2)
29
+ text = p_ar(audio)["text"]
30
+ state += text + " "
31
+ return state, state
32
 
33
+ # transcribe Tamil stream
34
+ ta_tr_stream_tab = gr.Interface(
35
+ fn=transcribe_ta_stream,
36
  inputs=[
37
  gr.Audio(source="microphone", type="filepath", streaming=True),
38
  "state"
 
41
  "textbox",
42
  "state"
43
  ],
44
+ live=True)
45
+ # transcribe Arabic stream
46
+ ar_tr_stream_tab = gr.Interface(
47
+ fn=transcribe_ar_stream,
48
+ inputs=[
49
+ gr.Audio(source="microphone", type="filepath", streaming=True),
50
+ "state"
51
+ ],
52
+ outputs=[
53
+ "textbox",
54
+ "state"
55
+ ],
56
+ live=True)
57
+
58
+ # transcribe Tamil file
59
+ ta_tr_file_tab = gr.Interface(
60
+ fn=transcribe_ta,
61
+ inputs=[
62
+ gr.Audio(source="microphone", type="filepath")
63
+ ],
64
+ outputs="text")
65
+
66
+ # transcribe Arabic file
67
+ ar_tr_file_tab = gr.Interface(
68
+ fn=transcribe_ar,
69
+ inputs=[
70
+ gr.Audio(source="microphone", type="filepath")
71
+ ],
72
+ outputs="text")
73
+
74
+
75
+ tabs = gr.TabbedInterface(
76
+ [ta_tr_stream_tab, ar_tr_stream_tab,ta_tr_file_tab, ar_tr_file_tab],
77
+ [
78
+ "Tamil Live Transcription",
79
+ "Arabic Live Transcription",
80
+ "Tamil File Transcription",
81
+ "Arabic File Transcription",
82
+ ]
83
+ )
84
+
85
+ if __name__ == "__main__":
86
+ tabs.launch()