Spaces:

Krishnavamshithumma
/

Voice-Bot-AI

Running

App Files Files Community

Krishnavamshithumma commited on Jun 16

Commit

59110a2

verified ·

1 Parent(s): 9cd9066

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -30

app.py CHANGED Viewed

@@ -2,6 +2,10 @@ import gradio as gr
 from openai import OpenAI
 import speech_recognition as sr
 import os
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -14,23 +18,35 @@ Answer questions about your background professionally but engagingly (2-3 senten
 # Initialize the SpeechRecognition Recognizer
 r = sr.Recognizer()
-def transcribe_audio_and_chat(audio_filepath, history, api_key):
     # 'history' now directly contains the list of message dictionaries
-    # that we can directly pass to OpenAI, after adding current turn.
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
-    # Always ensure history is a list, even if it somehow became None
     if history is None:
         history = []
-    if audio_filepath is None:
-        raise gr.Error("No audio received. Please speak into the microphone.")
     try:
-        # Load the audio file
-        with sr.AudioFile(audio_filepath) as source:
             audio_data = r.record(source) # read the entire audio file
         # Perform speech recognition
@@ -39,20 +55,11 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
             print(f"Transcribed User Input: {user_input}") # For debugging purposes
         except sr.UnknownValueError:
-            # If speech is unintelligible, add an assistant message to history
             history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
-            # Return history for chatbot, and None for audio input
-            return history, None
         except sr.RequestError as e:
-            # If API request fails, add an assistant message to history
             history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
-            # Return history for chatbot, and None for audio input
-            return history, None
-        finally:
-            # Always clean up the temporary audio file
-            if os.path.exists(audio_filepath):
-                os.remove(audio_filepath)
         # --- Proceed with OpenAI chat ---
         client = OpenAI(api_key=api_key)
@@ -76,15 +83,18 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
         history.append({"role": "user", "content": user_input})
         history.append({"role": "assistant", "content": bot_reply})
-        # Return the updated history for the chatbot component (state),
-        # and None for the audio input to clear it.
-        return history, None # CORRECT: Return history for 'state' output, None for audio input
     except Exception as e:
-        print(f"An unexpected error occurred: {e}") # Log the error
         raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
-# --- Gradio UI setup (no changes needed here) ---
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
     gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant")
@@ -132,25 +142,31 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
     api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password", elem_id="apiKeyInput")
     key_status = gr.HTML("<div id='keyStatus'></div>")
     chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
-    state = gr.State([]) # `state` will hold OpenAI-compatible messages
     audio_input = gr.Audio(
         sources=["microphone"],
-        type="filepath",
         label="Speak your message here",
         elem_id="audioInputComponent",
-        streaming=False
     )
     clear_btn = gr.Button("🗑️ Clear Chat")
     audio_input.change(
-        transcribe_audio_and_chat,
         inputs=[audio_input, state, api_key],
-        outputs=[chatbot, state] # Ensure chatbot and state are updated
     )
     gr.HTML("""
     <script>
         document.getElementById("apiKeyInput").addEventListener("input", function() {
@@ -164,11 +180,12 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
             }
         });
         document.querySelector("#apiKeyInput input").focus();
     </script>
     """)
-    # When clearing, ensure state is reset to an empty list
     clear_btn.click(lambda: ([], []), None, [chatbot, state])
 demo.launch()

 from openai import OpenAI
 import speech_recognition as sr
 import os
+import io # For in-memory file handling
+import scipy.io.wavfile as wavfile # For writing WAV data to in-memory file
+import numpy as np # To handle the audio array
+import datetime # For logging timestamps if needed (not directly used in this version, but good practice)
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 # Initialize the SpeechRecognition Recognizer
 r = sr.Recognizer()
+# Modified function to accept audio as a numpy array and samplerate
+def transcribe_audio_and_chat(audio_tuple, history, api_key):
     # 'history' now directly contains the list of message dictionaries
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
+    if audio_tuple is None:
+        # If no audio is received, add an assistant message to history and reset audio input
+        history.append({"role": "assistant", "content": "No audio received. Please speak into the microphone."})
+        return history, history, None
+    # Ensure history is a list, even if it somehow became None (defensive programming)
     if history is None:
         history = []
+    samplerate, audio_np_array = audio_tuple
     try:
+        # Convert the NumPy array to a format speech_recognition can handle (in-memory WAV)
+        # Ensure the array is int16 as it's a common format for audio samples and expected by scipy.io.wavfile
+        if audio_np_array.dtype != np.int16:
+             audio_np_array = audio_np_array.astype(np.int16)
+        wav_byte_io = io.BytesIO()
+        wavfile.write(wav_byte_io, samplerate, audio_np_array)
+        wav_byte_io.seek(0) # Rewind to the beginning of the BytesIO object
+        # Create an AudioFile object from the in-memory WAV data
+        with sr.AudioFile(wav_byte_io) as source:
             audio_data = r.record(source) # read the entire audio file
         # Perform speech recognition
             print(f"Transcribed User Input: {user_input}") # For debugging purposes
         except sr.UnknownValueError:
             history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
+            return history, history, None # Reset audio input after error
         except sr.RequestError as e:
             history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
+            return history, history, None # Reset audio input after error
         # --- Proceed with OpenAI chat ---
         client = OpenAI(api_key=api_key)
         history.append({"role": "user", "content": user_input})
         history.append({"role": "assistant", "content": bot_reply})
+        # Return the updated history for the chatbot component,
+        # history again for the 'state' component,
+        # and None for the audio input to clear it and make it ready for next input.
+        return history, history, None
     except Exception as e:
+        print(f"An unexpected error occurred: {e}") # Log the error for debugging
+        # If an unexpected error occurs, still try to reset the audio input
         raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
+# --- Gradio UI setup ---
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
     gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant")
     api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password", elem_id="apiKeyInput")
     key_status = gr.HTML("<div id='keyStatus'></div>")
+    # Chatbot component to display messages
     chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
+    # State component to maintain chat history in OpenAI's message format
+    state = gr.State([])
+    # Audio input component for microphone recording
     audio_input = gr.Audio(
         sources=["microphone"],
+        type="numpy", # Receive audio as (samplerate, numpy_array)
         label="Speak your message here",
         elem_id="audioInputComponent",
+        streaming=False # Process audio after full recording
     )
     clear_btn = gr.Button("🗑️ Clear Chat")
+    # Event handler: When audio input is recorded and submitted (by stopping recording)
     audio_input.change(
+        fn=transcribe_audio_and_chat,
         inputs=[audio_input, state, api_key],
+        # Outputs: 1. chatbot display, 2. state (updated history), 3. audio_input (to clear it)
+        outputs=[chatbot, state, audio_input]
     )
+    # JavaScript for API key input and status display
     gr.HTML("""
     <script>
         document.getElementById("apiKeyInput").addEventListener("input", function() {
             }
         });
+        // Focus on API key input on page load for convenience
         document.querySelector("#apiKeyInput input").focus();
     </script>
     """)
+    # Clear button functionality: resets chatbot and state to empty
     clear_btn.click(lambda: ([], []), None, [chatbot, state])
 demo.launch()