Baghdad99 commited on
Commit
abd2b24
1 Parent(s): 2983f43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  import numpy as np
 
4
 
5
  # Load the pipeline for speech recognition and translation
6
  pipe = pipeline(
@@ -12,9 +13,13 @@ translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-t
12
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
13
 
14
  # Define the function to translate speech
15
- def translate_speech(audio):
16
- # Separate the sample rate and the audio data
17
- sample_rate, audio_data = audio
 
 
 
 
18
 
19
  # Use the speech recognition pipeline to transcribe the audio
20
  output = pipe(audio_data)
@@ -58,7 +63,7 @@ def translate_speech(audio):
58
  # Define the Gradio interface
59
  iface = gr.Interface(
60
  fn=translate_speech,
61
- inputs=gr.inputs.Audio(source="microphone", type="numpy"),
62
  outputs=gr.outputs.Audio(type="numpy"),
63
  title="Hausa to English Translation",
64
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  import numpy as np
4
+ from pydub import AudioSegment
5
 
6
  # Load the pipeline for speech recognition and translation
7
  pipe = pipeline(
 
13
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
14
 
15
  # Define the function to translate speech
16
+ def translate_speech(audio_file):
17
+ # Load the audio file with pydub
18
+ audio = AudioSegment.from_mp3(audio_file.name)
19
+
20
+ # Convert the audio to mono and get the raw data
21
+ audio = audio.set_channels(1)
22
+ audio_data = np.array(audio.get_array_of_samples())
23
 
24
  # Use the speech recognition pipeline to transcribe the audio
25
  output = pipe(audio_data)
 
63
  # Define the Gradio interface
64
  iface = gr.Interface(
65
  fn=translate_speech,
66
+ inputs=gr.inputs.Audio(type="file"), # Change this line
67
  outputs=gr.outputs.Audio(type="numpy"),
68
  title="Hausa to English Translation",
69
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."