Spaces:

DreamStream-1
/

RAG_Chatbot

Sleeping

App Files Files Community

DreamStream-1 commited on Jun 2

Commit

4214099

verified ·

1 Parent(s): 4d8736d

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -50

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ import base64
 import json
 import time
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
@@ -363,97 +365,85 @@ def process_question(question, history):
         history.append({"role": "assistant", "content": f"Error: {str(e)}"})
         return "", history, "", None
-def process_audio(audio_file, history):
     if audio_file is None:
-        return "Please record or upload an audio file.", history, "", None
     try:
         transcript = rag.transcribe_audio(audio_file)
         if not transcript or not str(transcript).strip():
             history.append({"role": "user", "content": "🎤 [No audio detected or transcription failed]"})
             history.append({"role": "assistant", "content": "Sorry, I couldn't understand the audio. Please try again."})
-            return "", history, "", None
         if not rag.thread_id:
-            return "Please upload a document first.", history, "", None
         response = rag.ask_question(transcript)
         history.append({"role": "user", "content": f"🎤 {transcript}"})
         history.append({"role": "assistant", "content": response})
-        return "", history, "", None
     except Exception as e:
         history.append({"role": "user", "content": f"🎤 [Error transcribing audio: {str(e)}]"})
         history.append({"role": "assistant", "content": "It seems there was an error while transcribing audio due to a technical issue. If there's anything specific from the document or any other questions you have regarding the content, please let me know, and I can assist you with that information."})
-        return "", history, "", None
-def process_audio_base64(audio_base64, history):
-    if not audio_base64:
-        return "Please record an audio message first.", history
-    try:
-        # Convert base64 to audio file
-        audio_data = base64.b64decode(audio_base64.split(',')[1])
-        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
-            tmp.write(audio_data)
-            tmp.flush()
-            tmp_path = tmp.name
-        # Transcribe audio
-        with open(tmp_path, "rb") as audio_file:
-            transcript = openai.audio.transcriptions.create(
-                model="whisper-1",
-                file=audio_file,
-                language="en"
-            )
-        os.remove(tmp_path)
-        if not rag.thread_id:
-            return "Please upload a document first.", history
-        # Get response from assistant
-        response = rag.ask_question(transcript.text)
-        history.append((f"🎤 {transcript.text}", response))
-        return "", history
-    except Exception as e:
-        return "", history + [("Audio input", f"Error: {str(e)}")]
 # Create Gradio interface with improved layout
 with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
     gr.Markdown("""
     # <span style='color:#1976D2;'>Document Q&A System</span>
-    <div style='text-align:center; color:#1976D2; margin-bottom:18px;'>Upload a document, ask questions, or use your voice!</div>
     """)
     # Define shared components at the top
-    chatbot = gr.Chatbot(height=350, elem_classes="gradio-chatbot", label=None, type="messages")
     file_output = gr.Textbox(label="Upload Status", interactive=False, elem_classes="textbox")
     question = gr.Textbox(label="Type your question and press Enter", placeholder="Ask a question about your document...", elem_classes="textbox")
     audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
     audio_status = gr.Textbox(label="Audio Status", interactive=False, elem_classes="status-text", visible=False)
     with gr.Row():
-        # Sidebar on the left
-        with gr.Column(scale=1, min_width=280):
             with gr.Group(elem_classes="compact-box"):
-                gr.Markdown("<div class='section-title'>Document Q&A</div>")
                 file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
                 reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
                 file_input.change(process_file, file_input, file_output)
                 def reset_all():
                     rag.thread_id = None
-                    return "", [], "", None
-                reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input])
-                file_output
-        # Main chat area
         with gr.Column(scale=3, min_width=400):
             with gr.Group(elem_classes="compact-box"):
                 chatbot
                 with gr.Row():
                     question
                     send_btn = gr.Button("Send", elem_classes="send-btn")
-                    mic_btn = gr.Button("🎤", elem_classes="audio-btn")
                 send_btn.click(process_question, [question, chatbot], [question, chatbot, question, audio_input])
                 question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
-                audio_input.change(process_audio, [audio_input, chatbot], [audio_input, chatbot, question, audio_input])
-                def show_audio():
-                    return {audio_input: gr.update(visible=True), audio_status: gr.update(visible=True)}
-                mic_btn.click(show_audio, None, [audio_input, audio_status])
     # Add JavaScript for audio handling
     demo.load(

 import json
 import time
 from dotenv import load_dotenv
+from gtts import gTTS
+import io
 # Load environment variables
 load_dotenv()
         history.append({"role": "assistant", "content": f"Error: {str(e)}"})
         return "", history, "", None
+def synthesize_text(text):
+    try:
+        tts = gTTS(text)
+        fp = io.BytesIO()
+        tts.write_to_fp(fp)
+        fp.seek(0)
+        return fp.read()
+    except Exception as e:
+        return None
+def process_voice_note(audio_file, history):
     if audio_file is None:
+        return "Please record or upload an audio file.", history, "", None, None
     try:
         transcript = rag.transcribe_audio(audio_file)
         if not transcript or not str(transcript).strip():
             history.append({"role": "user", "content": "🎤 [No audio detected or transcription failed]"})
             history.append({"role": "assistant", "content": "Sorry, I couldn't understand the audio. Please try again."})
+            return "", history, "", None, None
         if not rag.thread_id:
+            return "Please upload a document first.", history, "", None, None
         response = rag.ask_question(transcript)
         history.append({"role": "user", "content": f"🎤 {transcript}"})
         history.append({"role": "assistant", "content": response})
+        tts_audio = synthesize_text(response)
+        return "", history, "", None, tts_audio
     except Exception as e:
         history.append({"role": "user", "content": f"🎤 [Error transcribing audio: {str(e)}]"})
         history.append({"role": "assistant", "content": "It seems there was an error while transcribing audio due to a technical issue. If there's anything specific from the document or any other questions you have regarding the content, please let me know, and I can assist you with that information."})
+        return "", history, "", None, None
 # Create Gradio interface with improved layout
 with gr.Blocks(css=custom_css, title="Document Q&A System") as demo:
     gr.Markdown("""
     # <span style='color:#1976D2;'>Document Q&A System</span>
+    <div style='text-align:center; color:#1976D2; margin-bottom:18px;'>Upload a document, record your voice, and chat!</div>
     """)
     # Define shared components at the top
+    chatbot = gr.Chatbot(height=400, elem_classes="gradio-chatbot", label=None, type="messages")
     file_output = gr.Textbox(label="Upload Status", interactive=False, elem_classes="textbox")
     question = gr.Textbox(label="Type your question and press Enter", placeholder="Ask a question about your document...", elem_classes="textbox")
     audio_input = gr.Audio(type="filepath", label="Record or Upload Audio", elem_classes="gradio-audio", visible=False)
     audio_status = gr.Textbox(label="Audio Status", interactive=False, elem_classes="status-text", visible=False)
+    tts_output = gr.Audio(label="Assistant Voice Reply", interactive=False, visible=False)
     with gr.Row():
+        # Controls on the top left
+        with gr.Column(scale=1, min_width=320):
             with gr.Group(elem_classes="compact-box"):
+                gr.Markdown("<div class='section-title'>Upload & Voice</div>")
                 file_input = gr.File(label="Upload Document", file_types=[".pdf", ".txt", ".doc", ".docx"], file_count="single", type="binary", elem_classes="upload-btn")
+                mic_btn = gr.Button("🎤 Record Voice", elem_classes="audio-btn")
+                audio_input
+                send_voice_btn = gr.Button("Send Voice Note", elem_classes="send-btn", visible=False)
                 reset_btn = gr.Button("Reset Chat & Upload New Document", elem_classes="reset-btn")
+                file_output
                 file_input.change(process_file, file_input, file_output)
                 def reset_all():
                     rag.thread_id = None
+                    return "", [], "", None, None
+                reset_btn.click(reset_all, None, [file_output, chatbot, question, audio_input, tts_output])
+                def show_audio():
+                    return {audio_input: gr.update(visible=True), send_voice_btn: gr.update(visible=True)}
+                mic_btn.click(show_audio, None, [audio_input, send_voice_btn])
+                def hide_audio():
+                    return {audio_input: gr.update(visible=False), send_voice_btn: gr.update(visible=False)}
+                send_voice_btn.click(process_voice_note, [audio_input, chatbot], [file_output, chatbot, question, audio_input, tts_output])
+                send_voice_btn.click(hide_audio, None, [audio_input, send_voice_btn])
+                tts_output
+        # Chatbot/chat area to the right
         with gr.Column(scale=3, min_width=400):
             with gr.Group(elem_classes="compact-box"):
                 chatbot
                 with gr.Row():
                     question
                     send_btn = gr.Button("Send", elem_classes="send-btn")
                 send_btn.click(process_question, [question, chatbot], [question, chatbot, question, audio_input])
                 question.submit(process_question, [question, chatbot], [question, chatbot, question, audio_input])
     # Add JavaScript for audio handling
     demo.load(