radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 24, 2024

Commit

58f24a5

verified ·

1 Parent(s): d8e6689

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -36

app.py CHANGED Viewed

@@ -203,49 +203,30 @@ pipe_asr = pipeline(
     tokenizer=processor.tokenizer,
     feature_extractor=processor.feature_extractor,
     max_new_tokens=128,
-    chunk_length_s=15,
     batch_size=16,
     torch_dtype=torch_dtype,
     device=device,
-    return_timestamps=True
 )
-# Set up logging to a file to capture debug information
-logging.basicConfig(filename='neo4j_retrieval.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
-# Function to handle voice input, generate response from Neo4j, and return audio output
-def handle_voice_to_voice(audio):
     try:
-        # Transcribe audio input to text
         sr, y = audio
         y = y.astype(np.float32)
         y = y / np.max(np.abs(y))  # Normalize audio to range [-1.0, 1.0]
-        logging.debug(f"Audio sample rate: {sr}")
-        logging.debug(f"Audio data: {y[:100]}")  # Log first 100 samples for brevity
-        # Process the audio data with Whisper ASR
         result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
-        question = result.get("text", "")
-        logging.debug(f"Transcribed question: {question}")
-        if not question:
-            return "No transcription available, please try again.", ""
-        # Get response using the transcribed question
-        response = get_response(question)
-        logging.debug(f"Response from Neo4j and GPT: {response}")
-        # Generate audio from the response
-        audio_path = generate_audio_elevenlabs(response)
-        logging.debug(f"Generated audio path: {audio_path}")
-        # Return the transcription and the audio path
-        return audio_path, question
     except Exception as e:
-        logging.error(f"Error in handle_voice_to_voice: {e}")
-        return "Error processing the audio, please try again.", ""
 # Function to clear the transcription state
 def clear_state():
@@ -254,21 +235,33 @@ def clear_state():
 # Define the Gradio interface
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True, label="Speak to Ask")
     submit_voice_btn = gr.Button("Submit Voice")
     clear_state_btn = gr.Button("Clear State")
-    transcription_textbox = gr.Textbox(label="Transcription", interactive=False)
     audio_output = gr.Audio(label="Response Audio", type="filepath", autoplay=True, interactive=False)
-    # Interactions for Submit Voice Button
-    submit_voice_btn.click(
-        fn=handle_voice_to_voice,
         inputs=audio_input,
-        outputs=[audio_output, transcription_textbox]
     )
     # Interaction for Clear State Button
     clear_state_btn.click(
-        fn=clear_state,
         outputs=transcription_textbox
     )

     tokenizer=processor.tokenizer,
     feature_extractor=processor.feature_extractor,
     max_new_tokens=128,
+    chunk_length_s=5,  # Process audio in 5-second chunks
     batch_size=16,
     torch_dtype=torch_dtype,
     device=device,
+    return_timestamps=False
 )
+# Function to process audio in real-time and update the transcription
+def transcribe_audio_real_time(audio):
     try:
         sr, y = audio
         y = y.astype(np.float32)
         y = y / np.max(np.abs(y))  # Normalize audio to range [-1.0, 1.0]
+        # Process the audio data with Whisper ASR in chunks
         result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
+        transcription = result.get("text", "")
+        logging.debug(f"Real-time transcription: {transcription}")
+        return transcription
     except Exception as e:
+        logging.error(f"Error during real-time transcription: {e}")
+        return "Error processing the audio, please try again."
 # Function to clear the transcription state
 def clear_state():
 # Define the Gradio interface
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True, label="Speak to Ask")
+    transcription_textbox = gr.Textbox(label="Transcription", interactive=False)
     submit_voice_btn = gr.Button("Submit Voice")
     clear_state_btn = gr.Button("Clear State")
     audio_output = gr.Audio(label="Response Audio", type="filepath", autoplay=True, interactive=False)
+    # Update the transcription text in real-time as the user speaks
+    audio_input.stream(
+        fn=transcribe_audio_real_time,
         inputs=audio_input,
+        outputs=transcription_textbox
+    )
+    # Define a placeholder function for handling submission
+    def handle_submit(text):
+        # Placeholder function, could trigger response generation or other actions
+        return f"You submitted: {text}"
+    # Handle the submission of the final transcribed text
+    submit_voice_btn.click(
+        fn=handle_submit,
+        inputs=transcription_textbox,
+        outputs=transcription_textbox
     )
     # Interaction for Clear State Button
     clear_state_btn.click(
+        fn=lambda: "",
         outputs=transcription_textbox
     )