YosefAyele commited on
Commit
5d5905f
·
1 Parent(s): 2f0ad31

add gradio app

Browse files
Files changed (2) hide show
  1. app.py +48 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from speechbrain.inference.ASR import EncoderASR
3
+ from pydub import AudioSegment
4
+ import os
5
+
6
+ # Load the ASR model
7
+ asr_model = EncoderASR.from_hparams(
8
+ source="YosefA/wave2vec2_amharic_stt",
9
+ savedir="pretrained_models/asr-wav2vec2-amharic"
10
+ )
11
+
12
+ # Directory to store converted audio files
13
+ os.makedirs("temp_audio", exist_ok=True)
14
+
15
+ def transcribe_audio(audio_file):
16
+ """
17
+ Converts the uploaded audio to .wav format, runs transcription, and returns the result.
18
+ """
19
+ # Save the uploaded audio to a temporary location
20
+ temp_audio_path = "temp_audio/input_audio.wav"
21
+
22
+ # Convert audio to .wav format
23
+ sound = AudioSegment.from_file(audio_file)
24
+ sound.export(temp_audio_path, format="wav")
25
+
26
+ # Transcribe the audio
27
+ transcription = asr_model.transcribe_file(temp_audio_path)
28
+
29
+ # Clean up temporary files (optional)
30
+ os.remove(temp_audio_path)
31
+
32
+ return transcription
33
+
34
+ # Define the Gradio interface
35
+ with gr.Blocks() as app:
36
+ gr.Markdown("### Amharic Speech-to-Text Transcription App")
37
+ gr.Markdown("Upload an audio file in any format, and get its transcription.")
38
+
39
+ with gr.Row():
40
+ audio_input = gr.File(label="Upload Audio File", type="filepath")
41
+ transcription_output = gr.Textbox(label="Transcription")
42
+
43
+ transcribe_button = gr.Button("Transcribe")
44
+ transcribe_button.click(transcribe_audio, inputs=audio_input, outputs=transcription_output)
45
+
46
+ # Launch the app
47
+ if __name__ == "__main__":
48
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ speechbrain
3
+ gradio
4
+ pydub