Spaces:
Sleeping
Sleeping
T.Masuda
commited on
Commit
Β·
c4d1e20
1
Parent(s):
ed89640
create app
Browse files- README.md +1 -1
- app.py +55 -0
- requirements.txt +5 -0
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
emoji: π
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
|
|
1 |
---
|
2 |
+
title: OpenAI Whisper Demo
|
3 |
emoji: π
|
4 |
colorFrom: green
|
5 |
colorTo: blue
|
app.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
from transformers import pipeline
|
4 |
+
|
5 |
+
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
|
6 |
+
pipe = pipeline('automatic-speech-recognition', model='openai/whisper-large', chunk_length_s=30, device=device)
|
7 |
+
|
8 |
+
def speech_to_text(mic, upload, state):
|
9 |
+
if state['active'] == 1:
|
10 |
+
audio = upload
|
11 |
+
else:
|
12 |
+
audio = mic
|
13 |
+
|
14 |
+
if audio is None:
|
15 |
+
yield None
|
16 |
+
return
|
17 |
+
|
18 |
+
prediction = pipe(audio, batch_size=8, generate_kwargs={'task': 'transcribe'}, return_timestamps=True)
|
19 |
+
yield prediction['text']
|
20 |
+
|
21 |
+
def tab_select(evt: gr.SelectData, state):
|
22 |
+
state['active'] = evt.target.id
|
23 |
+
print('select {}'.format(evt.target.id))
|
24 |
+
return state
|
25 |
+
|
26 |
+
with gr.Blocks(title='OpenAI Whisper Demo') as app:
|
27 |
+
state = gr.State({ 'active': 0 })
|
28 |
+
|
29 |
+
gr.Markdown('''
|
30 |
+
# OpenAI Whisper Demo
|
31 |
+
''')
|
32 |
+
with gr.Row():
|
33 |
+
with gr.Column():
|
34 |
+
inputs = []
|
35 |
+
with gr.Tab(label='microphone', id=0) as tab1:
|
36 |
+
mic = gr.Audio(source='microphone', type='filepath')
|
37 |
+
inputs.append(mic)
|
38 |
+
tab1.select(tab_select, inputs=state, outputs=state)
|
39 |
+
with gr.Tab(label='upload', id=1) as tab2:
|
40 |
+
upload = gr.Audio(source='upload', type='filepath')
|
41 |
+
inputs.append(upload)
|
42 |
+
tab2.select(tab_select, inputs=state, outputs=state)
|
43 |
+
with gr.Row():
|
44 |
+
with gr.Column(min_width=160):
|
45 |
+
clearBtn = gr.ClearButton()
|
46 |
+
with gr.Column(min_width=160):
|
47 |
+
btn = gr.Button(value='Submit')
|
48 |
+
with gr.Row():
|
49 |
+
with gr.Column():
|
50 |
+
outputs = [gr.Textbox(label='output')]
|
51 |
+
clearBtn.add(inputs + outputs)
|
52 |
+
btn.click(speech_to_text, inputs=inputs + [state], outputs=outputs)
|
53 |
+
|
54 |
+
app.queue(concurrency_count=20)
|
55 |
+
app.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
torch
|
3 |
+
torchvision
|
4 |
+
torchaudio
|
5 |
+
transformers
|