Spaces:
Sleeping
Sleeping
File size: 1,657 Bytes
c4d1e20 2dbcc16 c4d1e20 2dbcc16 c4d1e20 8a11b0e c4d1e20 8a11b0e aba6d47 c4d1e20 8a11b0e aba6d47 c4d1e20 ed5246e c4d1e20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr
import torch
from transformers import pipeline
#modelname = 'openai/whisper-large-v2'
modelname = 'openai/whisper-small'
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
pipe = pipeline('automatic-speech-recognition', model=modelname, chunk_length_s=30, device=device)
def speech_to_text(mic, upload, state):
if state['active'] == 1:
audio = upload
else:
audio = mic
if audio is None:
yield None
return
prediction = pipe(audio, batch_size=8, generate_kwargs={'task': 'transcribe'}, return_timestamps=True)
yield prediction['text']
def tab_select(evt: gr.SelectData, state):
state['active'] = evt.index
print('select {}'.format(evt.index))
return state
with gr.Blocks(title='OpenAI Whisper Demo') as app:
state = gr.State({ 'active': 0 })
gr.Markdown('''
# OpenAI Whisper Demo
''')
with gr.Row():
with gr.Column():
inputs = []
with gr.Tab('microphone') as tab1:
mic = gr.Audio(sources='microphone', type='filepath')
inputs.append(mic)
tab1.select(tab_select, inputs=state, outputs=state)
with gr.Tab('upload') as tab2:
upload = gr.Audio(sources='upload', type='filepath')
inputs.append(upload)
tab2.select(tab_select, inputs=state, outputs=state)
with gr.Row():
with gr.Column(min_width=160):
clearBtn = gr.ClearButton()
with gr.Column(min_width=160):
btn = gr.Button(value='Submit')
with gr.Row():
with gr.Column():
outputs = [gr.Textbox(label='output')]
clearBtn.add(inputs + outputs)
btn.click(speech_to_text, inputs=inputs + [state], outputs=outputs, concurrency_limit=20)
app.launch()
|