Baghdad99 commited on
Commit
ee37b95
·
1 Parent(s): 8a6097b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -14
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  import numpy as np
4
- from pydub import AudioSegment
5
 
6
  # Load the pipeline for speech recognition and translation
7
  pipe = pipeline(
@@ -13,15 +12,8 @@ translator = pipeline("text2text-generation", model="Baghdad99/saad-hausa-text-t
13
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
14
 
15
  # Define the function to translate speech
16
- def translate_speech(audio_file):
17
- print(f"Type of audio: {type(audio_file)}, Value of audio: {audio_file}") # Debug line
18
-
19
- # Load the audio file with pydub
20
- audio = AudioSegment.from_mp3(audio_file) # Change this line
21
-
22
- # Convert the audio to mono and get the raw data
23
- audio = audio.set_channels(1)
24
- audio_data = np.array(audio.get_array_of_samples())
25
 
26
  # Use the speech recognition pipeline to transcribe the audio
27
  output = pipe(audio_data)
@@ -65,15 +57,13 @@ def translate_speech(audio_file):
65
 
66
  return 16000, synthesised_speech
67
 
68
-
69
-
70
  # Define the Gradio interface
71
  iface = gr.Interface(
72
  fn=translate_speech,
73
- inputs=gr.inputs.Audio(type="filepath"), # Change this line
74
  outputs=gr.outputs.Audio(type="numpy"),
75
  title="Hausa to English Translation",
76
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
77
  )
78
 
79
- iface.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  import numpy as np
 
4
 
5
  # Load the pipeline for speech recognition and translation
6
  pipe = pipeline(
 
12
  tts = pipeline("text-to-speech", model="Baghdad99/english_voice_tts")
13
 
14
  # Define the function to translate speech
15
+ def translate_speech(audio_data):
16
+ print(f"Type of audio: {type(audio_data)}, Value of audio: {audio_data}") # Debug line
 
 
 
 
 
 
 
17
 
18
  # Use the speech recognition pipeline to transcribe the audio
19
  output = pipe(audio_data)
 
57
 
58
  return 16000, synthesised_speech
59
 
 
 
60
  # Define the Gradio interface
61
  iface = gr.Interface(
62
  fn=translate_speech,
63
+ inputs=gr.inputs.Audio(source="microphone"), # Change this line
64
  outputs=gr.outputs.Audio(type="numpy"),
65
  title="Hausa to English Translation",
66
  description="Realtime demo for Hausa to English translation using speech recognition and text-to-speech synthesis."
67
  )
68
 
69
+ iface.launch()