Pijush2023 commited on
Commit
61ae7dd
·
verified ·
1 Parent(s): fb3074d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -129,21 +129,40 @@ pipe_asr = pipeline(
129
  # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
130
  def transcribe_and_respond(audio):
131
  if audio is None:
 
132
  return None, "No audio provided."
133
 
134
  sr, y = audio
135
  y = np.array(y).astype(np.float32)
136
 
137
- # Transcribe the audio using Whisper
138
- result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
 
 
 
 
 
 
 
 
139
  question = result.get("text", "")
140
 
 
 
 
141
  # Retrieve information from Neo4j
142
  response_text = structured_retriever(question) if question else "I didn't understand the question."
143
 
144
  # Convert the response to audio using Eleven Labs TTS
145
  audio_path = generate_audio_elevenlabs(response_text) if response_text else None
146
 
 
 
 
 
 
 
 
147
  return audio_path, response_text
148
 
149
  # Function to clear the transcription state
@@ -177,9 +196,9 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
177
  # Clear state interaction
178
  gr.Button("Clear State").click(
179
  fn=clear_transcription_state,
180
- outputs=[audio_output],
181
  api_name="api_clean_state"
182
  )
183
 
184
  # Launch the Gradio interface
185
- demo.launch(show_error=True, share=True)
 
129
  # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
130
  def transcribe_and_respond(audio):
131
  if audio is None:
132
+ logging.error("No audio provided.")
133
  return None, "No audio provided."
134
 
135
  sr, y = audio
136
  y = np.array(y).astype(np.float32)
137
 
138
+ # Normalize the audio array
139
+ max_abs_y = np.max(np.abs(y))
140
+ if max_abs_y > 0:
141
+ y = y / max_abs_y
142
+
143
+ # Prepare input_features for Whisper model
144
+ input_features = processor(y, sampling_rate=sr, return_tensors="pt").input_features
145
+
146
+ # Transcribe the audio using Whisper with English language setting
147
+ result = pipe_asr({"input_features": input_features, "language": "en"}, return_timestamps=False)
148
  question = result.get("text", "")
149
 
150
+ # Log the transcribed text for debugging
151
+ logging.debug(f"Transcribed text: {question}")
152
+
153
  # Retrieve information from Neo4j
154
  response_text = structured_retriever(question) if question else "I didn't understand the question."
155
 
156
  # Convert the response to audio using Eleven Labs TTS
157
  audio_path = generate_audio_elevenlabs(response_text) if response_text else None
158
 
159
+ # Ensure a valid audio path is returned
160
+ if audio_path and os.path.exists(audio_path):
161
+ logging.debug(f"Generated audio file path: {audio_path}")
162
+ else:
163
+ logging.error("Failed to generate audio or save audio to file.")
164
+ audio_path = None
165
+
166
  return audio_path, response_text
167
 
168
  # Function to clear the transcription state
 
196
  # Clear state interaction
197
  gr.Button("Clear State").click(
198
  fn=clear_transcription_state,
199
+ outputs=[audio_output, gr.Textbox(label="Transcription")],
200
  api_name="api_clean_state"
201
  )
202
 
203
  # Launch the Gradio interface
204
+ demo.launch(show_error=True, share=True)