Spaces:

DrishtiSharma
/

chat-w-docs-via-speech-or-text

Running

App Files Files Community

DrishtiSharma commited on Dec 9, 2024

Commit

170c9ab

verified ·

1 Parent(s): bb6f1e7

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -5

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from langchain.memory import ConversationBufferMemory
 from langchain.chains import ConversationalRetrievalChain
 from PyPDF2 import PdfReader
 from groq import Groq
 # Clear ChromaDB cache to fix tenant issue
 chromadb.api.client.SharedSystemClient.clear_system_cache()
@@ -51,7 +53,7 @@ def chat_chain(vectorstore):
     )
     return chain
-# Function to transcribe audio using Groq's Whisper
 def transcribe_audio(file_path):
     """Transcribe audio using Groq's Whisper model."""
     with open(file_path, "rb") as file:
@@ -61,9 +63,13 @@ def transcribe_audio(file_path):
             response_format="json",
             language="en"
         )
-    # Access the text attribute
     return transcription.text
 # Streamlit UI
 st.title("Chat with PDFs via Audio 🎙️📚")
@@ -74,7 +80,7 @@ if uploaded_files:
     chain = chat_chain(vectorstore)
     st.success("PDFs processed! Ready to chat.")
-    input_method = st.radio("Choose Input Method", ["Text Input", "Audio File Upload"])
     # Text Input Mode
     if input_method == "Text Input":
@@ -84,8 +90,44 @@ if uploaded_files:
                 response = chain({"question": query})["answer"]
                 st.write(f"**Response:** {response}")
-    # Audio Input Mode (File Upload)
-    elif input_method == "Audio File Upload":
         uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
         if uploaded_audio:
             audio_file_path = "uploaded_audio.wav"

 from langchain.chains import ConversationalRetrievalChain
 from PyPDF2 import PdfReader
 from groq import Groq
+from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode
+import av
 # Clear ChromaDB cache to fix tenant issue
 chromadb.api.client.SharedSystemClient.clear_system_cache()
     )
     return chain
+# Transcribe audio using Groq Whisper
 def transcribe_audio(file_path):
     """Transcribe audio using Groq's Whisper model."""
     with open(file_path, "rb") as file:
             response_format="json",
             language="en"
         )
     return transcription.text
+# Audio Processor Class for Recording
+class AudioProcessor(AudioProcessorBase):
+    def recv(self, frame: av.AudioFrame) -> av.AudioFrame:
+        return frame
 # Streamlit UI
 st.title("Chat with PDFs via Audio 🎙️📚")
     chain = chat_chain(vectorstore)
     st.success("PDFs processed! Ready to chat.")
+    input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"])
     # Text Input Mode
     if input_method == "Text Input":
                 response = chain({"question": query})["answer"]
                 st.write(f"**Response:** {response}")
+    # Record Audio
+    elif input_method == "Record Audio":
+        st.write("Record your audio query:")
+        webrtc_ctx = webrtc_streamer(
+            key="record",
+            mode=WebRtcMode.SENDONLY,
+            audio_receiver_size=1024,
+            audio_processor_factory=AudioProcessor,
+            media_stream_constraints={"audio": True, "video": False},
+        )
+        if webrtc_ctx.audio_receiver:
+            st.write("Recording...")
+            audio_frames = []
+            while True:
+                frame = webrtc_ctx.audio_receiver.recv()
+                audio_frames.append(frame)
+                if len(audio_frames) > 5:  # Stop recording after a few frames
+                    break
+            # Save the recorded audio
+            audio_file_path = "recorded_audio.wav"
+            with av.open(audio_file_path, "w") as f:
+                for frame in audio_frames:
+                    f.write(frame)
+            st.success("Recording complete!")
+            # Transcribe and Generate Response
+            st.write("Transcribing audio...")
+            transcription = transcribe_audio(audio_file_path)
+            st.write(f"**You said:** {transcription}")
+            with st.spinner("Generating response..."):
+                response = chain({"question": transcription})["answer"]
+                st.write(f"**Response:** {response}")
+    # Upload Audio File Mode
+    elif input_method == "Upload Audio File":
         uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"])
         if uploaded_audio:
             audio_file_path = "uploaded_audio.wav"