Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from transformers import pipeline | |
#modelname = 'openai/whisper-large-v2' | |
modelname = 'openai/whisper-small' | |
device = 'cuda:0' if torch.cuda.is_available() else 'cpu' | |
pipe = pipeline('automatic-speech-recognition', model=modelname, chunk_length_s=30, device=device) | |
def speech_to_text(mic, upload, state): | |
if state['active'] == 1: | |
audio = upload | |
else: | |
audio = mic | |
if audio is None: | |
yield None | |
return | |
prediction = pipe(audio, batch_size=8, generate_kwargs={'task': 'transcribe'}, return_timestamps=True) | |
yield prediction['text'] | |
def tab_select(evt: gr.SelectData, state): | |
state['active'] = evt.index | |
print('select {}'.format(evt.index)) | |
return state | |
with gr.Blocks(title='OpenAI Whisper Demo') as app: | |
state = gr.State({ 'active': 0 }) | |
gr.Markdown(''' | |
# OpenAI Whisper Demo | |
''') | |
with gr.Row(): | |
with gr.Column(): | |
inputs = [] | |
with gr.Tab('microphone') as tab1: | |
mic = gr.Audio(sources='microphone', type='filepath') | |
inputs.append(mic) | |
tab1.select(tab_select, inputs=state, outputs=state) | |
with gr.Tab('upload') as tab2: | |
upload = gr.Audio(sources='upload', type='filepath') | |
inputs.append(upload) | |
tab2.select(tab_select, inputs=state, outputs=state) | |
with gr.Row(): | |
with gr.Column(min_width=160): | |
clearBtn = gr.ClearButton() | |
with gr.Column(min_width=160): | |
btn = gr.Button(value='Submit') | |
with gr.Row(): | |
with gr.Column(): | |
outputs = [gr.Textbox(label='output')] | |
clearBtn.add(inputs + outputs) | |
btn.click(speech_to_text, inputs=inputs + [state], outputs=outputs, concurrency_limit=20) | |
app.launch() | |