Spaces:

Krishnavamshithumma
/

Voice-Bot-AI

Running

App Files Files Community

Krishnavamshithumma commited on Jun 16

Commit

1f0a91c

verified ·

1 Parent(s): b5d9400

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -35

app.py CHANGED Viewed

@@ -15,12 +15,13 @@ Answer questions about your background professionally but engagingly (2-3 senten
 r = sr.Recognizer()
 def transcribe_audio_and_chat(audio_filepath, history, api_key):
     if not api_key:
-        # Raise a Gradio error to be displayed in the UI
         raise gr.Error("❌ Please enter your OpenAI API key.")
     if audio_filepath is None:
-        # Raise a Gradio error if no audio is captured
         raise gr.Error("No audio received. Please speak into the microphone.")
     try:
@@ -32,62 +33,54 @@ def transcribe_audio_and_chat(audio_filepath, history, api_key):
         try:
             user_input = r.recognize_google(audio_data) # Using Google Web Speech API
             print(f"Transcribed User Input: {user_input}") # For debugging purposes
-            # If transcription is successful, you might want to show it in the chat
-            # before the AI responds. For now, we'll just use it directly.
         except sr.UnknownValueError:
-            # If speech is unintelligible
-            # Return current history and an error message for the chatbot
-            return history + [("", "Sorry, I could not understand the audio. Please try again.")], ""
         except sr.RequestError as e:
-            # If API request fails
-            # Return current history and an error message for the chatbot
-            return history + [("", f"Could not request results from Google Speech Recognition service; {e}")], ""
         finally:
-            # Always clean up the temporary audio file, regardless of success or failure
             if os.path.exists(audio_filepath):
                 os.remove(audio_filepath)
         # --- Proceed with OpenAI chat ---
         client = OpenAI(api_key=api_key)
-        # Build messages from history
-        messages = [{"role": "system", "content": system_prompt}]
-        for entry in history:
-            # Ensure history entries are tuples (user_message, bot_message)
-            if isinstance(entry, (list, tuple)) and len(entry) == 2:
-                messages.append({"role": "user", "content": entry[0]})
-                messages.append({"role": "assistant", "content": entry[1]})
-        messages.append({"role": "user", "content": user_input}) # Add the current user input
         # Get response from OpenAI
         response = client.chat.completions.create(
             model="gpt-4o",
-            messages=messages,
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
-        # Append the new user input and bot reply to the history
-        history.append((user_input, bot_reply))
-        # Return the updated history for the chatbot component
-        # and an empty string for the audio input, effectively clearing it for next input.
-        return history, None # Use None for the audio input to reset the component
     except Exception as e:
-        # Catch any other unexpected errors
         print(f"An unexpected error occurred: {e}") # Log the error
-        # Raise a Gradio error for display in the UI
         raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
-# --- Gradio UI setup remains mostly the same ---
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
     gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant")
-    # Add custom CSS
     gr.HTML("""
     <style>
         #chatBox {
@@ -110,7 +103,7 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
         .bot {
             background: #f5f5f5;
         }
-        #audioInputComponent { /* New ID for the audio component */
             margin-top: 20px;
         }
         .key-status {
@@ -131,8 +124,10 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
     api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password", elem_id="apiKeyInput")
     key_status = gr.HTML("<div id='keyStatus'></div>")
     chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
-    state = gr.State([]) # Stores the chat history
     audio_input = gr.Audio(
         sources=["microphone"],
@@ -144,14 +139,12 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
     clear_btn = gr.Button("🗑️ Clear Chat")
-    # Event handler: When audio input is recorded and submitted (by stopping recording)
     audio_input.change(
         transcribe_audio_and_chat,
         inputs=[audio_input, state, api_key],
-        outputs=[chatbot, state] # Ensure chatbot and state are updated
     )
-    # JavaScript for API key status (still useful for UX)
     gr.HTML("""
     <script>
         document.getElementById("apiKeyInput").addEventListener("input", function() {
@@ -165,11 +158,11 @@ with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
             }
         });
-        // Focus on API key input on load
         document.querySelector("#apiKeyInput input").focus();
     </script>
     """)
     clear_btn.click(lambda: ([], []), None, [chatbot, state])
 demo.launch()

 r = sr.Recognizer()
 def transcribe_audio_and_chat(audio_filepath, history, api_key):
+    # 'history' now directly contains the list of message dictionaries
+    # that we can directly pass to OpenAI, after adding current turn.
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
     if audio_filepath is None:
         raise gr.Error("No audio received. Please speak into the microphone.")
     try:
         try:
             user_input = r.recognize_google(audio_data) # Using Google Web Speech API
             print(f"Transcribed User Input: {user_input}") # For debugging purposes
         except sr.UnknownValueError:
+            # If speech is unintelligible, add an assistant message to history
+            history.append({"role": "assistant", "content": "Sorry, I could not understand the audio. Please try again."})
+            return history, None
         except sr.RequestError as e:
+            # If API request fails, add an assistant message to history
+            history.append({"role": "assistant", "content": f"Could not request results from Google Speech Recognition service; {e}"})
+            return history, None
         finally:
+            # Always clean up the temporary audio file
             if os.path.exists(audio_filepath):
                 os.remove(audio_filepath)
         # --- Proceed with OpenAI chat ---
         client = OpenAI(api_key=api_key)
+        # Create the full messages list for OpenAI, starting with the system prompt
+        # and then appending the current chat history, followed by the new user input.
+        messages_for_openai = [{"role": "system", "content": system_prompt}] + history
+        messages_for_openai.append({"role": "user", "content": user_input})
         # Get response from OpenAI
         response = client.chat.completions.create(
             model="gpt-4o",
+            messages=messages_for_openai, # Pass the correctly formatted messages
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
+        # Append both the user input and bot reply to the *Gradio* history (state)
+        # in the 'messages' format that Gradio's chatbot expects.
+        history.append({"role": "user", "content": user_input})
+        history.append({"role": "assistant", "content": bot_reply})
+        # Return the updated history for the chatbot component, and clear the audio input.
+        return history, None
     except Exception as e:
         print(f"An unexpected error occurred: {e}") # Log the error
         raise gr.Error(f"❌ An unexpected error occurred: {str(e)}")
+# --- Gradio UI setup ---
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
     gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant")
     gr.HTML("""
     <style>
         #chatBox {
         .bot {
             background: #f5f5f5;
         }
+        #audioInputComponent {
             margin-top: 20px;
         }
         .key-status {
     api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password", elem_id="apiKeyInput")
     key_status = gr.HTML("<div id='keyStatus'></div>")
+    # Crucially, set type="messages" here to match OpenAI's expected format
     chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
+    state = gr.State([]) # Now `state` will directly hold OpenAI-compatible messages
     audio_input = gr.Audio(
         sources=["microphone"],
     clear_btn = gr.Button("🗑️ Clear Chat")
     audio_input.change(
         transcribe_audio_and_chat,
         inputs=[audio_input, state, api_key],
+        outputs=[chatbot, state]
     )
     gr.HTML("""
     <script>
         document.getElementById("apiKeyInput").addEventListener("input", function() {
             }
         });
         document.querySelector("#apiKeyInput input").focus();
     </script>
     """)
+    # When clearing, ensure state is reset to an empty list, matching the 'messages' format
     clear_btn.click(lambda: ([], []), None, [chatbot, state])
 demo.launch()