T.Masuda commited on
Commit
c4d1e20
Β·
1 Parent(s): ed89640

create app

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +55 -0
  3. requirements.txt +5 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Openai Whisper Demo
3
  emoji: πŸ‘€
4
  colorFrom: green
5
  colorTo: blue
 
1
  ---
2
+ title: OpenAI Whisper Demo
3
  emoji: πŸ‘€
4
  colorFrom: green
5
  colorTo: blue
app.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import pipeline
4
+
5
+ device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
6
+ pipe = pipeline('automatic-speech-recognition', model='openai/whisper-large', chunk_length_s=30, device=device)
7
+
8
+ def speech_to_text(mic, upload, state):
9
+ if state['active'] == 1:
10
+ audio = upload
11
+ else:
12
+ audio = mic
13
+
14
+ if audio is None:
15
+ yield None
16
+ return
17
+
18
+ prediction = pipe(audio, batch_size=8, generate_kwargs={'task': 'transcribe'}, return_timestamps=True)
19
+ yield prediction['text']
20
+
21
+ def tab_select(evt: gr.SelectData, state):
22
+ state['active'] = evt.target.id
23
+ print('select {}'.format(evt.target.id))
24
+ return state
25
+
26
+ with gr.Blocks(title='OpenAI Whisper Demo') as app:
27
+ state = gr.State({ 'active': 0 })
28
+
29
+ gr.Markdown('''
30
+ # OpenAI Whisper Demo
31
+ ''')
32
+ with gr.Row():
33
+ with gr.Column():
34
+ inputs = []
35
+ with gr.Tab(label='microphone', id=0) as tab1:
36
+ mic = gr.Audio(source='microphone', type='filepath')
37
+ inputs.append(mic)
38
+ tab1.select(tab_select, inputs=state, outputs=state)
39
+ with gr.Tab(label='upload', id=1) as tab2:
40
+ upload = gr.Audio(source='upload', type='filepath')
41
+ inputs.append(upload)
42
+ tab2.select(tab_select, inputs=state, outputs=state)
43
+ with gr.Row():
44
+ with gr.Column(min_width=160):
45
+ clearBtn = gr.ClearButton()
46
+ with gr.Column(min_width=160):
47
+ btn = gr.Button(value='Submit')
48
+ with gr.Row():
49
+ with gr.Column():
50
+ outputs = [gr.Textbox(label='output')]
51
+ clearBtn.add(inputs + outputs)
52
+ btn.click(speech_to_text, inputs=inputs + [state], outputs=outputs)
53
+
54
+ app.queue(concurrency_count=20)
55
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ transformers