Pijush2023 commited on
Commit
0f95a25
·
verified ·
1 Parent(s): 7e66356

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -0
app.py CHANGED
@@ -113,6 +113,12 @@ pipe_asr = pipeline(
113
  def handle_voice_to_voice(audio):
114
  # Transcribe audio input to text
115
  sr, y = audio
 
 
 
 
 
 
116
  result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
117
  question = result.get("text", "")
118
 
@@ -123,6 +129,7 @@ def handle_voice_to_voice(audio):
123
  audio_path = generate_audio_elevenlabs(response)
124
  return audio_path
125
 
 
126
  # Define the Gradio interface
127
  with gr.Blocks() as demo:
128
  audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=False, label="Speak to Ask")
 
113
  def handle_voice_to_voice(audio):
114
  # Transcribe audio input to text
115
  sr, y = audio
116
+
117
+ # Ensure that the audio is in float32 format
118
+ y = y.astype(np.float32)
119
+ y = y / np.max(np.abs(y)) # Normalize audio to range [-1.0, 1.0]
120
+
121
+ # Process the audio data with Whisper ASR
122
  result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
123
  question = result.get("text", "")
124
 
 
129
  audio_path = generate_audio_elevenlabs(response)
130
  return audio_path
131
 
132
+
133
  # Define the Gradio interface
134
  with gr.Blocks() as demo:
135
  audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=False, label="Speak to Ask")