Spaces:

Krishnavamshithumma
/

Voice-Bot-AI

Running

App Files Files Community

Krishnavamshithumma commited on Jun 16

Commit

630158a

verified ·

1 Parent(s): 4860ff8

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -36

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from openai import OpenAI
 import speech_recognition as sr
-import tempfile
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -11,64 +11,161 @@ system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When r
 - A Computer Science graduate from Neil Gogte Institute of Technology
 Answer questions about your background professionally but engagingly (2-3 sentences max)."""
-def speech_to_text(audio):
-    recognizer = sr.Recognizer()
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
-        tmp_file.write(audio)
-        tmp_file.flush()
-        with sr.AudioFile(tmp_file.name) as source:
-            audio_data = recognizer.record(source)
-            try:
-                text = recognizer.recognize_google(audio_data)
-                return text
-            except sr.UnknownValueError:
-                return "❌ Could not understand the audio"
-            except sr.RequestError as e:
-                return f"❌ Speech recognition error: {e}"
-def chat_with_openai(user_input, history, api_key):
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
     try:
         client = OpenAI(api_key=api_key)
         messages = [{"role": "system", "content": system_prompt}]
         for entry in history:
             messages.append({"role": "user", "content": entry[0]})
             messages.append({"role": "assistant", "content": entry[1]})
-        messages.append({"role": "user", "content": user_input})
         response = client.chat.completions.create(
             model="gpt-4o",
             messages=messages,
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
-        history.append((user_input, bot_reply))
-        return history, ""
     except Exception as e:
-        raise gr.Error(f"❌ Error: {str(e)}")
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
-    gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant (No JavaScript)")
-    api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password")
-    chatbot = gr.Chatbot(height=400)
     state = gr.State([])
-    with gr.Row():
-        voice_input = gr.Audio(source="microphone", type="filepath", label="🎤 Speak here")
-        transcribed_text = gr.Textbox(label="Transcribed Text")
-    # When audio is submitted, convert to text
-    voice_input.change(speech_to_text, voice_input, transcribed_text)
-    # When transcribed text is ready, send to OpenAI
-    transcribed_text.submit(
         chat_with_openai,
-        [transcribed_text, state, api_key],
-        [chatbot, state, transcribed_text]
     )
-    clear_btn = gr.Button("🗑️ Clear Chat")
-    clear_btn.click(lambda: ([], []), None, [chatbot, state])
-demo.launch()

 import gradio as gr
 from openai import OpenAI
 import speech_recognition as sr
+import os # Import os for managing temporary audio files
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 - A Computer Science graduate from Neil Gogte Institute of Technology
 Answer questions about your background professionally but engagingly (2-3 sentences max)."""
+def transcribe_audio(audio_file_path):
+    """Transcribes an audio file using Google Web Speech API."""
+    if audio_file_path is None:
+        return "" # Return empty string if no audio provided
+    r = sr.Recognizer()
+    try:
+        with sr.AudioFile(audio_file_path) as source:
+            audio_data = r.record(source)
+            text = r.recognize_google(audio_data)
+            return text
+    except sr.UnknownValueError:
+        return "Sorry, I could not understand the audio."
+    except sr.RequestError as e:
+        return f"Could not request results from Google Web Speech API service; {e}"
+    except Exception as e:
+        return f"An error occurred during transcription: {e}"
+    finally:
+        # Clean up the temporary audio file
+        if os.path.exists(audio_file_path):
+            os.remove(audio_file_path)
+def chat_with_openai(user_input_text, history, api_key):
+    """Handles chat with OpenAI, now accepting text input."""
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
+    if not user_input_text:
+        return history, user_input_text # Return current history if input is empty
     try:
         client = OpenAI(api_key=api_key)
         messages = [{"role": "system", "content": system_prompt}]
         for entry in history:
             messages.append({"role": "user", "content": entry[0]})
             messages.append({"role": "assistant", "content": entry[1]})
+        messages.append({"role": "user", "content": user_input_text})
         response = client.chat.completions.create(
             model="gpt-4o",
             messages=messages,
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
+        history.append((user_input_text, bot_reply))
+        return history, "" # Clear the input text box after processing
     except Exception as e:
+        raise gr.Error(f"❌ Error during OpenAI chat: {str(e)}")
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
+    gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant")
+    # Add custom CSS - kept for styling, removed JS for mic handling
+    gr.HTML("""
+    <style>
+        #chatBox {
+            height: 60vh;
+            overflow-y: auto;
+            padding: 20px;
+            border-radius: 10px;
+            background: #f9f9f9;
+            margin-bottom: 20px;
+        }
+        .message {
+            margin: 10px 0;
+            padding: 12px;
+            border-radius: 8px;
+        }
+        .user {
+            background: #e3f2fd;
+            text-align: right;
+        }
+        .bot {
+            background: #f5f5f5;
+        }
+        .key-status {
+            padding: 5px;
+            margin-top: 5px;
+            border-radius: 4px;
+        }
+        .success {
+            background: #d4edda;
+            color: #155724;
+        }
+        .error {
+            background: #f8d7da;
+            color: #721c24;
+        }
+        /* Style for the Gradio Audio component, if needed */
+        .gradio-audio-recorder {
+            margin-top: 20px; /* Add some space above the recorder */
+        }
+    </style>
+    """)
+    api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password", elem_id="apiKeyInput")
+    key_status = gr.HTML("<div id='keyStatus'></div>")
+    chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
     state = gr.State([])
+    # New Gradio Audio component for recording
+    # source="microphone" makes it record directly from mic
+    # type="filepath" means it will pass a path to a temporary audio file to the function
+    audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your question", elem_id="audioInput")
+    # This Textbox will display the transcribed text and serve as input to chat_with_openai
+    transcribed_text_output = gr.Textbox(label="Transcribed Text", interactive=False)
+    # Adding a hidden submit button if needed, or link directly to the audio_input
+    clear_btn = gr.Button("🗑️ Clear Chat")
+    # Event flow:
+    # 1. User records audio using audio_input.
+    # 2. When recording stops, audio_input triggers its change event.
+    # 3. transcribe_audio is called with the audio file path.
+    # 4. The transcribed text updates transcribed_text_output.
+    # 5. The submit event of transcribed_text_output (implicitly when its value changes)
+    #    or an explicit submit button would then trigger chat_with_openai.
+    #    For simplicity, let's make it so that when audio is transcribed, it directly chats.
+    audio_input.change(
+        transcribe_audio,
+        inputs=[audio_input],
+        outputs=[transcribed_text_output]
+    )
+    # Now, when transcribed_text_output changes (i.e., new transcription is available),
+    # we want to send it to chat_with_openai.
+    transcribed_text_output.submit(
         chat_with_openai,
+        inputs=[transcribed_text_output, state, api_key],
+        outputs=[chatbot, state, transcribed_text_output] # Clear transcribed_text_output after chat
     )
+    # JavaScript for API key input and status (still useful for UI)
+    gr.HTML("""
+    <script>
+        document.getElementById("apiKeyInput").addEventListener("input", function() {
+            const apiKey = this.value.trim();
+            const keyStatus = document.getElementById("keyStatus");
+            if (apiKey) {
+                keyStatus.innerHTML = '<div class="key-status success">API Key saved successfully!</div>';
+            } else {
+                keyStatus.innerHTML = '<div class="key-status error">Please enter a valid API key</div>';
+            }
+        });
+        // Initial setup for key status
+        document.querySelector("#apiKeyInput input").focus();
+    </script>
+    """)
+    clear_btn.click(lambda: ([], "", None), None, [chatbot, state, transcribed_text_output]) # Clear audio input too
+demo.launch()