Spaces:

Krishnavamshithumma
/

Voice-Bot-AI

Running

App Files Files Community

Krishnavamshithumma commited on Jun 16

Commit

3b4c90e

verified ·

1 Parent(s): 9fc3d39

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -133

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from openai import OpenAI
 import speech_recognition as sr
-import os # Import os for managing temporary audio files
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
@@ -11,161 +11,64 @@ system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When r
 - A Computer Science graduate from Neil Gogte Institute of Technology
 Answer questions about your background professionally but engagingly (2-3 sentences max)."""
-def transcribe_audio(audio_file_path):
-    """Transcribes an audio file using Google Web Speech API."""
-    if audio_file_path is None:
-        return "" # Return empty string if no audio provided
-    r = sr.Recognizer()
-    try:
-        with sr.AudioFile(audio_file_path) as source:
-            audio_data = r.record(source)
-            text = r.recognize_google(audio_data)
-            return text
-    except sr.UnknownValueError:
-        return "Sorry, I could not understand the audio."
-    except sr.RequestError as e:
-        return f"Could not request results from Google Web Speech API service; {e}"
-    except Exception as e:
-        return f"An error occurred during transcription: {e}"
-    finally:
-        # Clean up the temporary audio file
-        if os.path.exists(audio_file_path):
-            os.remove(audio_file_path)
-def chat_with_openai(user_input_text, history, api_key):
-    """Handles chat with OpenAI, now accepting text input."""
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
-    if not user_input_text:
-        return history, user_input_text # Return current history if input is empty
     try:
         client = OpenAI(api_key=api_key)
         messages = [{"role": "system", "content": system_prompt}]
         for entry in history:
             messages.append({"role": "user", "content": entry[0]})
             messages.append({"role": "assistant", "content": entry[1]})
-        messages.append({"role": "user", "content": user_input_text})
         response = client.chat.completions.create(
             model="gpt-4o",
             messages=messages,
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
-        history.append((user_input_text, bot_reply))
-        return history, "" # Clear the input text box after processing
     except Exception as e:
-        raise gr.Error(f"❌ Error during OpenAI chat: {str(e)}")
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
-    gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant")
-    # Add custom CSS - kept for styling, removed JS for mic handling
-    gr.HTML("""
-    <style>
-        #chatBox {
-            height: 60vh;
-            overflow-y: auto;
-            padding: 20px;
-            border-radius: 10px;
-            background: #f9f9f9;
-            margin-bottom: 20px;
-        }
-        .message {
-            margin: 10px 0;
-            padding: 12px;
-            border-radius: 8px;
-        }
-        .user {
-            background: #e3f2fd;
-            text-align: right;
-        }
-        .bot {
-            background: #f5f5f5;
-        }
-        .key-status {
-            padding: 5px;
-            margin-top: 5px;
-            border-radius: 4px;
-        }
-        .success {
-            background: #d4edda;
-            color: #155724;
-        }
-        .error {
-            background: #f8d7da;
-            color: #721c24;
-        }
-        /* Style for the Gradio Audio component, if needed */
-        .gradio-audio-recorder {
-            margin-top: 20px; /* Add some space above the recorder */
-        }
-    </style>
-    """)
-    api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password", elem_id="apiKeyInput")
-    key_status = gr.HTML("<div id='keyStatus'></div>")
-    chatbot = gr.Chatbot(elem_id="chatBox", type="messages", height=400)
     state = gr.State([])
-    # New Gradio Audio component for recording
-    # source="microphone" makes it record directly from mic
-    # type="filepath" means it will pass a path to a temporary audio file to the function
-    audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record your question", elem_id="audioInput")
-    # This Textbox will display the transcribed text and serve as input to chat_with_openai
-    transcribed_text_output = gr.Textbox(label="Transcribed Text", interactive=False)
-    # Adding a hidden submit button if needed, or link directly to the audio_input
-    clear_btn = gr.Button("🗑️ Clear Chat")
-    # Event flow:
-    # 1. User records audio using audio_input.
-    # 2. When recording stops, audio_input triggers its change event.
-    # 3. transcribe_audio is called with the audio file path.
-    # 4. The transcribed text updates transcribed_text_output.
-    # 5. The submit event of transcribed_text_output (implicitly when its value changes)
-    #    or an explicit submit button would then trigger chat_with_openai.
-    #    For simplicity, let's make it so that when audio is transcribed, it directly chats.
-    audio_input.change(
-        transcribe_audio,
-        inputs=[audio_input],
-        outputs=[transcribed_text_output]
-    )
-    # Now, when transcribed_text_output changes (i.e., new transcription is available),
-    # we want to send it to chat_with_openai.
-    transcribed_text_output.submit(
         chat_with_openai,
-        inputs=[transcribed_text_output, state, api_key],
-        outputs=[chatbot, state, transcribed_text_output] # Clear transcribed_text_output after chat
     )
-    # JavaScript for API key input and status (still useful for UI)
-    gr.HTML("""
-    <script>
-        document.getElementById("apiKeyInput").addEventListener("input", function() {
-            const apiKey = this.value.trim();
-            const keyStatus = document.getElementById("keyStatus");
-            if (apiKey) {
-                keyStatus.innerHTML = '<div class="key-status success">API Key saved successfully!</div>';
-            } else {
-                keyStatus.innerHTML = '<div class="key-status error">Please enter a valid API key</div>';
-            }
-        });
-        // Initial setup for key status
-        document.querySelector("#apiKeyInput input").focus();
-    </script>
-    """)
-    clear_btn.click(lambda: ([], "", None), None, [chatbot, state, transcribed_text_output]) # Clear audio input too
-demo.launch()

 import gradio as gr
 from openai import OpenAI
 import speech_recognition as sr
+import tempfile
 system_prompt = """You are a voice bot representing Krishnavamshi Thumma. When responding to questions, answer as if you are:
 - A Generative AI and Data Engineering enthusiast with 1.5+ years of experience
 - A Computer Science graduate from Neil Gogte Institute of Technology
 Answer questions about your background professionally but engagingly (2-3 sentences max)."""
+def speech_to_text(audio):
+    recognizer = sr.Recognizer()
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
+        tmp_file.write(audio)
+        tmp_file.flush()
+        with sr.AudioFile(tmp_file.name) as source:
+            audio_data = recognizer.record(source)
+            try:
+                text = recognizer.recognize_google(audio_data)
+                return text
+            except sr.UnknownValueError:
+                return "❌ Could not understand the audio"
+            except sr.RequestError as e:
+                return f"❌ Speech recognition error: {e}"
+def chat_with_openai(user_input, history, api_key):
     if not api_key:
         raise gr.Error("❌ Please enter your OpenAI API key.")
     try:
         client = OpenAI(api_key=api_key)
         messages = [{"role": "system", "content": system_prompt}]
         for entry in history:
             messages.append({"role": "user", "content": entry[0]})
             messages.append({"role": "assistant", "content": entry[1]})
+        messages.append({"role": "user", "content": user_input})
         response = client.chat.completions.create(
             model="gpt-4o",
             messages=messages,
             temperature=0.7
         )
         bot_reply = response.choices[0].message.content
+        history.append((user_input, bot_reply))
+        return history, ""
     except Exception as e:
+        raise gr.Error(f"❌ Error: {str(e)}")
 with gr.Blocks(title="Voice Bot: Krishnavamshi Thumma") as demo:
+    gr.Markdown("## 🎙️ Krishnavamshi Thumma - Voice Assistant (No JavaScript)")
+    api_key = gr.Textbox(label="🔐 OpenAI API Key", type="password")
+    chatbot = gr.Chatbot(height=400)
     state = gr.State([])
+    with gr.Row():
+        voice_input = gr.Audio(sources=["microphone"], type="filepath", label="🎤 Speak here")
+        transcribed_text = gr.Textbox(label="Transcribed Text")
+    # When audio is submitted, convert to text
+    voice_input.change(speech_to_text, voice_input, transcribed_text)
+    # When transcribed text is ready, send to OpenAI
+    transcribed_text.submit(
         chat_with_openai,
+        [transcribed_text, state, api_key],
+        [chatbot, state, transcribed_text]
     )
+    clear_btn = gr.Button("🗑️ Clear Chat")
+    clear_btn.click(lambda: ([], []), None, [chatbot, state])
+demo.launch()