Spaces:
Runtime error
Runtime error
Commit
·
a17627f
1
Parent(s):
5d5905f
make it possible to record audio from within the app
Browse files
app.py
CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
|
|
2 |
from speechbrain.inference.ASR import EncoderASR
|
3 |
from pydub import AudioSegment
|
4 |
import os
|
|
|
|
|
|
|
5 |
|
6 |
# Load the ASR model
|
7 |
asr_model = EncoderASR.from_hparams(
|
@@ -31,13 +34,32 @@ def transcribe_audio(audio_file):
|
|
31 |
|
32 |
return transcription
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# Define the Gradio interface
|
35 |
with gr.Blocks() as app:
|
36 |
gr.Markdown("### Amharic Speech-to-Text Transcription App")
|
37 |
-
gr.Markdown("Upload an audio file in any format, and get its transcription.")
|
38 |
|
39 |
with gr.Row():
|
40 |
-
audio_input = gr.
|
41 |
transcription_output = gr.Textbox(label="Transcription")
|
42 |
|
43 |
transcribe_button = gr.Button("Transcribe")
|
|
|
2 |
from speechbrain.inference.ASR import EncoderASR
|
3 |
from pydub import AudioSegment
|
4 |
import os
|
5 |
+
import numpy as np
|
6 |
+
from scipy.io.wavfile import write
|
7 |
+
|
8 |
|
9 |
# Load the ASR model
|
10 |
asr_model = EncoderASR.from_hparams(
|
|
|
34 |
|
35 |
return transcription
|
36 |
|
37 |
+
def save_audio_to_file(audio_data, file_path="temp_audio/input_audio.wav"):
|
38 |
+
"""
|
39 |
+
Converts Gradio audio data (numpy array and sample rate) to a .wav file.
|
40 |
+
"""
|
41 |
+
audio_array, sample_rate = audio_data
|
42 |
+
# Normalize and save as .wav
|
43 |
+
os.makedirs(os.path.dirname(file_path), exist_ok=True)
|
44 |
+
write(file_path, sample_rate, np.int16(audio_array * 32767)) # Convert float32 to int16
|
45 |
+
return file_path
|
46 |
+
|
47 |
+
def process_audio(audio_data):
|
48 |
+
"""
|
49 |
+
Processes recorded/uploaded audio, saves it, and sends it to the transcribe_audio function.
|
50 |
+
"""
|
51 |
+
temp_audio_path = save_audio_to_file(audio_data)
|
52 |
+
transcription = transcribe_audio(temp_audio_path)
|
53 |
+
return transcription
|
54 |
+
|
55 |
+
|
56 |
# Define the Gradio interface
|
57 |
with gr.Blocks() as app:
|
58 |
gr.Markdown("### Amharic Speech-to-Text Transcription App")
|
59 |
+
gr.Markdown("Upload or record an audio file in any format, and get its transcription.")
|
60 |
|
61 |
with gr.Row():
|
62 |
+
audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
|
63 |
transcription_output = gr.Textbox(label="Transcription")
|
64 |
|
65 |
transcribe_button = gr.Button("Transcribe")
|