#ref: https://www.youtube.com/watch?v=3ZDVmzlM6Nc import os import chromadb import streamlit as st from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from langchain_groq import ChatGroq from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from PyPDF2 import PdfReader from groq import Groq from streamlit_webrtc import webrtc_streamer, AudioProcessorBase, WebRtcMode import av # Clear ChromaDB cache to fix tenant issue chromadb.api.client.SharedSystemClient.clear_system_cache() # Ensure required environment variables are set GROQ_API_KEY = os.getenv("GROQ_API_KEY") if not GROQ_API_KEY: st.error("GROQ_API_KEY is not set. Please configure it in environment variables.") st.stop() # Initialize Groq Client for transcription and LLM groq_client = Groq(api_key=GROQ_API_KEY) llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY) # Function to process PDFs and set up the vectorstore def process_and_store_pdfs(uploaded_files): texts = [] for uploaded_file in uploaded_files: reader = PdfReader(uploaded_file) for page in reader.pages: texts.append(page.extract_text()) embeddings = HuggingFaceEmbeddings() vectorstore = Chroma.from_texts(texts, embedding=embeddings, persist_directory="vector_db_dir") return vectorstore # Function to set up the chat chain def chat_chain(vectorstore): retriever = vectorstore.as_retriever() memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True) chain = ConversationalRetrievalChain.from_llm( llm=llm, retriever=retriever, chain_type="stuff", memory=memory, verbose=True, return_source_documents=True ) return chain # Transcribe audio using Groq Whisper def transcribe_audio(file_path): """Transcribe audio using Groq's Whisper model.""" with open(file_path, "rb") as file: transcription = groq_client.audio.transcriptions.create( file=(file_path, file.read()), model="distil-whisper-large-v3-en", response_format="json", language="en" ) return transcription.text # Audio Processor Class for Recording class AudioProcessor(AudioProcessorBase): def recv(self, frame: av.AudioFrame) -> av.AudioFrame: return frame # Streamlit UI st.title("Chat with PDFs via Speech/Text 🎙️📝📚") uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"]) if uploaded_files: vectorstore = process_and_store_pdfs(uploaded_files) chain = chat_chain(vectorstore) st.success("PDFs processed! Ready to chat.") input_method = st.radio("Choose Input Method", ["Text Input", "Record Audio", "Upload Audio File"]) # Text Input Mode if input_method == "Text Input": query = st.text_input("Ask your question:") if query: with st.spinner("Thinking..."): response = chain({"question": query})["answer"] st.write(f"**Response:** {response}") # Record Audio elif input_method == "Record Audio": st.write("Record your audio query:") webrtc_ctx = webrtc_streamer( key="record", mode=WebRtcMode.SENDONLY, audio_receiver_size=1024, audio_processor_factory=AudioProcessor, media_stream_constraints={"audio": True, "video": False}, ) if webrtc_ctx.audio_receiver: st.write("Recording...") audio_frames = [] while True: frame = webrtc_ctx.audio_receiver.recv() audio_frames.append(frame) if len(audio_frames) > 5: # Stop recording after a few frames break # Save the recorded audio audio_file_path = "recorded_audio.wav" with av.open(audio_file_path, "w") as f: for frame in audio_frames: f.write(frame) st.success("Recording complete!") # Transcribe and Generate Response st.write("Transcribing audio...") transcription = transcribe_audio(audio_file_path) st.write(f"**You said:** {transcription}") with st.spinner("Generating response..."): response = chain({"question": transcription})["answer"] st.write(f"**Response:** {response}") # Upload Audio File Mode elif input_method == "Upload Audio File": uploaded_audio = st.file_uploader("Upload an audio file (.wav, .mp3)", type=["wav", "mp3"]) if uploaded_audio: audio_file_path = "uploaded_audio.wav" with open(audio_file_path, "wb") as f: f.write(uploaded_audio.read()) st.audio(audio_file_path, format="audio/wav") st.write("Transcribing audio...") transcription = transcribe_audio(audio_file_path) st.write(f"**You said:** {transcription}") with st.spinner("Generating response..."): response = chain({"question": transcription})["answer"] st.write(f"**Response:** {response}") else: st.info("Please upload PDF files to start chatting.")