Teapack1 commited on
Commit
db9a501
1 Parent(s): 4e18b5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -41
app.py CHANGED
@@ -1,48 +1,19 @@
1
  import gradio as gr
2
- from pydub import AudioSegment
3
- from time import sleep
4
 
5
- with gr.Blocks() as demo:
6
- input_audio = gr.Audio(label="Input Audio", type="filepath", format="mp3")
7
- with gr.Row():
8
- with gr.Column():
9
- stream_as_file_btn = gr.Button("Stream as File")
10
- format = gr.Radio(["wav", "mp3"], value="wav", label="Format")
11
- stream_as_file_output = gr.Audio(streaming=True)
12
 
13
- def stream_file(audio_file, format):
14
- audio = AudioSegment.from_file(audio_file)
15
- i = 0
16
- chunk_size = 1000
17
- while chunk_size * i < len(audio):
18
- chunk = audio[chunk_size * i : chunk_size * (i + 1)]
19
- i += 1
20
- if chunk:
21
- file = f"/tmp/{i}.{format}"
22
- chunk.export(file, format=format)
23
- yield file
24
- sleep(0.5)
25
 
26
- stream_as_file_btn.click(
27
- stream_file, [input_audio, format], stream_as_file_output
28
- )
29
 
30
 
31
- with gr.Column():
32
- stream_as_bytes_btn = gr.Button("Stream as Bytes")
33
- stream_as_bytes_output = gr.Audio(format="bytes", streaming=True)
34
 
35
- def stream_bytes(audio_file):
36
- chunk_size = 20_000
37
- with open(audio_file, "rb") as f:
38
- while True:
39
- chunk = f.read(chunk_size)
40
- if chunk:
41
- yield chunk
42
- sleep(1)
43
- else:
44
- break
45
- stream_as_bytes_btn.click(stream_bytes, input_audio, stream_as_bytes_output)
46
-
47
- if __name__ == "__main__":
48
- demo.queue().launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
 
5
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 
 
 
 
 
 
6
 
7
+ def transcribe(audio):
8
+ sr, y = audio
9
+ y = y.astype(np.float32)
10
+ y /= np.max(np.abs(y))
 
 
 
 
 
 
 
 
11
 
12
+ return transcriber({"sampling_rate": sr, "raw": y})["text"]
 
 
13
 
14
 
15
+ demo = gr.Interface(
16
+ fn=transcribe, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.outputsText()
17
+ )
18
 
19
+ demo.launch()