Spaces:

tonyliu404
/

Audio-to-text-Translated

Sleeping

App Files Files Community

tonyliu404 commited on Oct 29, 2024

Commit

1b91c86

verified ·

1 Parent(s): 849212b

Create app.py

Browse files

Files changed (1) hide show

app.py +50 -0

app.py ADDED Viewed

	@@ -0,0 +1,50 @@

+asr = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") #sound to text model
+demo = gr.Blocks()
+def transcribe_long_form(filepath):
+    if filepath is None:
+        gr.Warning("No audio found, please retry.")
+        return ""
+    audio, sampling_rate = sf.read(filepath) #reading the converted .wav
+    #converting audio into one dimension (stereo audio has 2, audio and spacial audio. We dont need spacial)
+    audio_transposed = np.transpose(audio)
+    audio_mono = librosa.to_mono(audio_transposed)
+    IPythonAudio(audio_mono, rate=sampling_rate)
+    #converting to same sampling rate as model
+    audio_16KHz = librosa.resample(audio_mono,
+                                orig_sr=sampling_rate,
+                                target_sr=16000)
+    output = asr(
+      audio_16KHz,
+      max_new_tokens=256,
+      chunk_length_s=30,
+      batch_size=12,
+    )
+    return output["text"]
+mic_transcribe = gr.Interface(
+    fn=transcribe_long_form,
+    inputs=gr.Audio(sources="microphone",
+                    type="filepath"),
+    outputs=gr.Textbox(label="Transcription",
+                       lines=3),
+    allow_flagging="never")
+file_transcribe = gr.Interface(
+    fn=transcribe_long_form,
+    inputs=gr.Audio(sources="upload",
+                    type="filepath"),
+    outputs=gr.Textbox(label="Transcription",
+                       lines=3),
+    allow_flagging="never",
+)
+with demo:
+    gr.TabbedInterface(
+        [mic_transcribe,
+         file_transcribe],
+        ["Transcribe Microphone",
+         "Transcribe Audio File"],
+    )
+demo.launch()