radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 23, 2024

Commit

35593b5

verified ·

1 Parent(s): 680dd01

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -30

app.py CHANGED Viewed

@@ -133,25 +133,15 @@ pipe_asr = pipeline(
 )
 # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
-def transcribe_and_respond(stream, new_chunk):
-    try:
-        sr, y = new_chunk[0], new_chunk[1]
-    except TypeError:
-        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
-        return stream, "", None
-    y = y.astype(np.float32)
-    max_abs_y = np.max(np.abs(y))
-    if max_abs_y > 0:
-        y = y / max_abs_y
-    if stream is not None and len(stream) > 0:
-        stream = np.concatenate([stream, y])
-    else:
-        stream = y
     # Transcribe the audio using Whisper
-    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
     question = result.get("text", "")
     # Retrieve information from Neo4j
@@ -160,20 +150,15 @@ def transcribe_and_respond(stream, new_chunk):
     # Convert the response to audio using Eleven Labs TTS
     audio_path = generate_audio_elevenlabs(response_text) if response_text else None
-    return stream, question, audio_path
-# Function to clear the transcription state
-def clear_transcription_state():
-    return None, "", None
 # Define the Gradio interface with only audio input and output
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     with gr.Row():
         audio_input = gr.Audio(
             sources=["microphone"],
-            streaming=True,
             type='numpy',
-            every=0.1,
             label="Speak to Ask"
         )
         audio_output = gr.Audio(
@@ -183,19 +168,18 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
             interactive=False
         )
-    # Speech-to-Text to TTS functionality with Neo4j retrieval
-    state = gr.State()
-    audio_input.stream(
-        transcribe_and_respond,
-        inputs=[state, audio_input],
-        outputs=[state, audio_output],
-        api_name="api_voice_to_neo4j_response"
     )
     # Clear state interaction
     gr.Button("Clear State").click(
         fn=clear_transcription_state,
-        outputs=[state, audio_output],
         api_name="api_clean_state"
     )

 )
 # Function to handle audio input, transcribe, fetch from Neo4j, and generate audio response
+def transcribe_and_respond(audio):
+    if audio is None:
+        return None, "No audio provided."
+    sr, y = audio
+    y = np.array(y).astype(np.float32)
     # Transcribe the audio using Whisper
+    result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
     question = result.get("text", "")
     # Retrieve information from Neo4j
     # Convert the response to audio using Eleven Labs TTS
     audio_path = generate_audio_elevenlabs(response_text) if response_text else None
+    return audio_path, response_text
 # Define the Gradio interface with only audio input and output
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     with gr.Row():
         audio_input = gr.Audio(
             sources=["microphone"],
             type='numpy',
             label="Speak to Ask"
         )
         audio_output = gr.Audio(
             interactive=False
         )
+    # Submit button to process the audio input
+    submit_btn = gr.Button("Submit")
+    submit_btn.click(
+        fn=transcribe_and_respond,
+        inputs=audio_input,
+        outputs=[audio_output, gr.Textbox(label="Transcription")]
     )
     # Clear state interaction
     gr.Button("Clear State").click(
         fn=clear_transcription_state,
+        outputs=[audio_output],
         api_name="api_clean_state"
     )