radarbackend11262024v11

Runtime error

App Files Files Community

Pijush2023 commited on Oct 24, 2024

Commit

739e317

verified ·

1 Parent(s): 49d592e

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -22

app.py CHANGED Viewed

@@ -210,31 +210,41 @@ pipe_asr = pipeline(
     return_timestamps=False
 )
-# Function to process audio in real-time and update the transcription
-def transcribe_audio_real_time(audio):
     try:
-        sr, y = audio
-        y = y.astype(np.float32)
-        y = y / np.max(np.abs(y))  # Normalize audio to range [-1.0, 1.0]
-        # Process the audio data with Whisper ASR in chunks
-        result = pipe_asr({"array": y, "sampling_rate": sr}, return_timestamps=False)
-        transcription = result.get("text", "")
-        logging.debug(f"Real-time transcription: {transcription}")
-        return transcription
-    except Exception as e:
-        logging.error(f"Error during real-time transcription: {e}")
-        return "Error processing the audio, please try again."
-# Function to clear the transcription state
-def clear_state():
-    return ""
 # Define the Gradio interface
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
-    audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=False, label="Speak to Ask")
     transcription_textbox = gr.Textbox(label="Transcription", interactive=False)
     submit_voice_btn = gr.Button("Submit Voice")
     clear_state_btn = gr.Button("Clear State")
@@ -242,12 +252,12 @@ with gr.Blocks(theme="rawrsor1/Everforest") as demo:
     # Update the transcription text in real-time as the user speaks
     audio_input.stream(
-        fn=transcribe_audio_real_time,
-        inputs=audio_input,
-        outputs=transcription_textbox
     )
-    # Define a placeholder function for handling submission
     def handle_submit(text):
         # Placeholder function, could trigger response generation or other actions
         return f"You submitted: {text}"

     return_timestamps=False
 )
+# Function to handle audio transcription in real-time
+def transcribe_function(stream, new_chunk):
     try:
+        sr, y = new_chunk[0], new_chunk[1]
+    except TypeError:
+        print(f"Error chunk structure: {type(new_chunk)}, content: {new_chunk}")
+        return stream, "", None
+    # Ensure y is not empty and is at least 1-dimensional
+    if y is None or len(y) == 0:
+        return stream, "", None
+    y = y.astype(np.float32)
+    max_abs_y = np.max(np.abs(y))
+    if max_abs_y > 0:
+        y = y / max_abs_y
+    # Ensure stream is also at least 1-dimensional before concatenation
+    if stream is not None and len(stream) > 0:
+        stream = np.concatenate([stream, y])
+    else:
+        stream = y
+    # Process the audio data for transcription
+    result = pipe_asr({"array": stream, "sampling_rate": sr}, return_timestamps=False)
+    full_text = result.get("text", "")
+    # Start a thread to reset the state after 10 seconds
+    threading.Thread(target=auto_reset_state).start()
+    return stream, full_text, full_text
 # Define the Gradio interface
 with gr.Blocks(theme="rawrsor1/Everforest") as demo:
+    audio_input = gr.Audio(sources=["microphone"], type='numpy', streaming=True, label="Speak to Ask")
     transcription_textbox = gr.Textbox(label="Transcription", interactive=False)
     submit_voice_btn = gr.Button("Submit Voice")
     clear_state_btn = gr.Button("Clear State")
     # Update the transcription text in real-time as the user speaks
     audio_input.stream(
+        fn=transcribe_function,
+        inputs=[None, audio_input],
+        outputs=[None, transcription_textbox, transcription_textbox]
     )
+    # Placeholder function for handling submission
     def handle_submit(text):
         # Placeholder function, could trigger response generation or other actions
         return f"You submitted: {text}"