radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 23, 2024

Commit

61ae7dd

verified ·

1 Parent(s): fb3074d

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -4

app.py CHANGED Viewed

@@ -129,21 +129,40 @@ pipe_asr = pipeline(
 # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
 def transcribe_and_respond(audio):
     if audio is None:
         return None, "No audio provided."
     sr, y = audio
     y = np.array(y).astype(np.float32)
-    # Transcribe the audio using Whisper
-    result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
     question = result.get("text", "")
     # Retrieve information from Neo4j
     response_text = structured_retriever(question) if question else "I didn't understand the question."
     # Convert the response to audio using Eleven Labs TTS
     audio_path = generate_audio_elevenlabs(response_text) if response_text else None
     return audio_path, response_text
 # Function to clear the transcription state
@@ -177,9 +196,9 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     # Clear state interaction
     gr.Button("Clear State").click(
         fn=clear_transcription_state,
-        outputs=[audio_output],
         api_name="api_clean_state"
     )
 # Launch the Gradio interface
-demo.launch(show_error=True, share=True)

 # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
 def transcribe_and_respond(audio):
     if audio is None:
+        logging.error("No audio provided.")
         return None, "No audio provided."
     sr, y = audio
     y = np.array(y).astype(np.float32)
+    # Normalize the audio array
+    max_abs_y = np.max(np.abs(y))
+    if max_abs_y > 0:
+        y = y / max_abs_y
+    # Prepare input_features for Whisper model
+    input_features = processor(y, sampling_rate=sr, return_tensors="pt").input_features
+    # Transcribe the audio using Whisper with English language setting
+    result = pipe_asr({"input_features": input_features, "language": "en"}, return_timestamps=False)
     question = result.get("text", "")
+    # Log the transcribed text for debugging
+    logging.debug(f"Transcribed text: {question}")
     # Retrieve information from Neo4j
     response_text = structured_retriever(question) if question else "I didn't understand the question."
     # Convert the response to audio using Eleven Labs TTS
     audio_path = generate_audio_elevenlabs(response_text) if response_text else None
+    # Ensure a valid audio path is returned
+    if audio_path and os.path.exists(audio_path):
+        logging.debug(f"Generated audio file path: {audio_path}")
+    else:
+        logging.error("Failed to generate audio or save audio to file.")
+        audio_path = None
     return audio_path, response_text
 # Function to clear the transcription state
     # Clear state interaction
     gr.Button("Clear State").click(
         fn=clear_transcription_state,
+        outputs=[audio_output, gr.Textbox(label="Transcription")],
         api_name="api_clean_state"
     )
 # Launch the Gradio interface
+demo.launch(show_error=True, share=True)