tonyliu404 commited on
Commit
1b91c86
·
verified ·
1 Parent(s): 849212b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ asr = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") #sound to text model
2
+
3
+ demo = gr.Blocks()
4
+ def transcribe_long_form(filepath):
5
+ if filepath is None:
6
+ gr.Warning("No audio found, please retry.")
7
+ return ""
8
+ audio, sampling_rate = sf.read(filepath) #reading the converted .wav
9
+ #converting audio into one dimension (stereo audio has 2, audio and spacial audio. We dont need spacial)
10
+ audio_transposed = np.transpose(audio)
11
+ audio_mono = librosa.to_mono(audio_transposed)
12
+ IPythonAudio(audio_mono, rate=sampling_rate)
13
+
14
+ #converting to same sampling rate as model
15
+ audio_16KHz = librosa.resample(audio_mono,
16
+ orig_sr=sampling_rate,
17
+ target_sr=16000)
18
+ output = asr(
19
+ audio_16KHz,
20
+ max_new_tokens=256,
21
+ chunk_length_s=30,
22
+ batch_size=12,
23
+ )
24
+ return output["text"]
25
+
26
+ mic_transcribe = gr.Interface(
27
+ fn=transcribe_long_form,
28
+ inputs=gr.Audio(sources="microphone",
29
+ type="filepath"),
30
+ outputs=gr.Textbox(label="Transcription",
31
+ lines=3),
32
+ allow_flagging="never")
33
+
34
+ file_transcribe = gr.Interface(
35
+ fn=transcribe_long_form,
36
+ inputs=gr.Audio(sources="upload",
37
+ type="filepath"),
38
+ outputs=gr.Textbox(label="Transcription",
39
+ lines=3),
40
+ allow_flagging="never",
41
+ )
42
+
43
+ with demo:
44
+ gr.TabbedInterface(
45
+ [mic_transcribe,
46
+ file_transcribe],
47
+ ["Transcribe Microphone",
48
+ "Transcribe Audio File"],
49
+ )
50
+ demo.launch()