YosefAyele commited on
Commit
a17627f
·
1 Parent(s): 5d5905f

make it possible to record audio from within the app

Browse files
Files changed (1) hide show
  1. app.py +24 -2
app.py CHANGED
@@ -2,6 +2,9 @@ import gradio as gr
2
  from speechbrain.inference.ASR import EncoderASR
3
  from pydub import AudioSegment
4
  import os
 
 
 
5
 
6
  # Load the ASR model
7
  asr_model = EncoderASR.from_hparams(
@@ -31,13 +34,32 @@ def transcribe_audio(audio_file):
31
 
32
  return transcription
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Define the Gradio interface
35
  with gr.Blocks() as app:
36
  gr.Markdown("### Amharic Speech-to-Text Transcription App")
37
- gr.Markdown("Upload an audio file in any format, and get its transcription.")
38
 
39
  with gr.Row():
40
- audio_input = gr.File(label="Upload Audio File", type="filepath")
41
  transcription_output = gr.Textbox(label="Transcription")
42
 
43
  transcribe_button = gr.Button("Transcribe")
 
2
  from speechbrain.inference.ASR import EncoderASR
3
  from pydub import AudioSegment
4
  import os
5
+ import numpy as np
6
+ from scipy.io.wavfile import write
7
+
8
 
9
  # Load the ASR model
10
  asr_model = EncoderASR.from_hparams(
 
34
 
35
  return transcription
36
 
37
+ def save_audio_to_file(audio_data, file_path="temp_audio/input_audio.wav"):
38
+ """
39
+ Converts Gradio audio data (numpy array and sample rate) to a .wav file.
40
+ """
41
+ audio_array, sample_rate = audio_data
42
+ # Normalize and save as .wav
43
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
44
+ write(file_path, sample_rate, np.int16(audio_array * 32767)) # Convert float32 to int16
45
+ return file_path
46
+
47
+ def process_audio(audio_data):
48
+ """
49
+ Processes recorded/uploaded audio, saves it, and sends it to the transcribe_audio function.
50
+ """
51
+ temp_audio_path = save_audio_to_file(audio_data)
52
+ transcription = transcribe_audio(temp_audio_path)
53
+ return transcription
54
+
55
+
56
  # Define the Gradio interface
57
  with gr.Blocks() as app:
58
  gr.Markdown("### Amharic Speech-to-Text Transcription App")
59
+ gr.Markdown("Upload or record an audio file in any format, and get its transcription.")
60
 
61
  with gr.Row():
62
+ audio_input = gr.Audio(label="Upload or Record Audio", type="filepath")
63
  transcription_output = gr.Textbox(label="Transcription")
64
 
65
  transcribe_button = gr.Button("Transcribe")