Spaces:

mega-snowman
/

openai-whisper-demo

Sleeping

App Files Files Community

T.Masuda commited on Oct 3, 2023

Commit

c4d1e20

1 Parent(s): ed89640

create app

Browse files

Files changed (3) hide show

README.md +1 -1
app.py +55 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Openai Whisper Demo
 emoji: 👀
 colorFrom: green
 colorTo: blue

 ---
+title: OpenAI Whisper Demo
 emoji: 👀
 colorFrom: green
 colorTo: blue

app.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import gradio as gr
+import torch
+from transformers import pipeline
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+pipe = pipeline('automatic-speech-recognition', model='openai/whisper-large', chunk_length_s=30, device=device)
+def speech_to_text(mic, upload, state):
+	if state['active'] == 1:
+		audio = upload
+	else:
+		audio = mic
+	if audio is None:
+		yield None
+		return
+	prediction = pipe(audio, batch_size=8, generate_kwargs={'task': 'transcribe'}, return_timestamps=True)
+	yield prediction['text']
+def tab_select(evt: gr.SelectData, state):
+	state['active'] = evt.target.id
+	print('select {}'.format(evt.target.id))
+	return state
+with gr.Blocks(title='OpenAI Whisper Demo') as app:
+	state = gr.State({ 'active': 0 })
+	gr.Markdown('''
+	# OpenAI Whisper Demo
+	''')
+	with gr.Row():
+		with gr.Column():
+			inputs = []
+			with gr.Tab(label='microphone', id=0) as tab1:
+				mic = gr.Audio(source='microphone', type='filepath')
+				inputs.append(mic)
+				tab1.select(tab_select, inputs=state, outputs=state)
+			with gr.Tab(label='upload', id=1) as tab2:
+				upload = gr.Audio(source='upload', type='filepath')
+				inputs.append(upload)
+				tab2.select(tab_select, inputs=state, outputs=state)
+	with gr.Row():
+		with gr.Column(min_width=160):
+			clearBtn = gr.ClearButton()
+		with gr.Column(min_width=160):
+			btn = gr.Button(value='Submit')
+	with gr.Row():
+		with gr.Column():
+			outputs = [gr.Textbox(label='output')]
+	clearBtn.add(inputs + outputs)
+	btn.click(speech_to_text, inputs=inputs + [state], outputs=outputs)
+app.queue(concurrency_count=20)
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+torch
+torchvision
+torchaudio
+transformers