import os import faiss import streamlit as st from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq from dotenv import load_dotenv import requests from io import BytesIO # Predefined Google Drive links PDF_LINKS = [ "https://drive.google.com/uc?id=1JPf0XvDhn8QoDOlZDrxCOpu4WzKFESNz", # Add more Google Drive links here ] # Initialize Groq client client = Groq(api_key="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") # Load Sentence Transformer model model = SentenceTransformer("all-MiniLM-L6-v2") # Initialize FAISS dimension = 384 # Embedding size for the Sentence Transformer model index = faiss.IndexFlatL2(dimension) # Store chunks globally stored_chunks = [] # Function to download and extract the PDF content def download_and_process_pdf(link): response = requests.get(link) if response.status_code == 200: pdf_reader = PdfReader(BytesIO(response.content)) text = "" for page in pdf_reader.pages: text += page.extract_text() chunks = [text[i:i + 500] for i in range(0, len(text), 500)] # Chunk into 500-char blocks embeddings = model.encode(chunks) index.add(embeddings) stored_chunks.extend(chunks) else: print(f"Failed to download PDF from link: {link}") # Process all predefined links for link in PDF_LINKS: download_and_process_pdf(link) # Function to query FAISS and generate a response def query_model(query): query_vector = model.encode([query]) _, indices = index.search(query_vector, k=3) # Top 3 similar chunks response_chunks = [stored_chunks[idx] for idx in indices[0]] context = " ".join(response_chunks) # Groq API call chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": f"Context: {context}\n\nQuery: {query}", } ], model="llama3-8b-8192", ) return chat_completion.choices[0].message.content # Streamlit app st.title("RAG-based PDF Question Answering") st.write("Preloaded documents from Google Drive are ready for querying.") query = st.text_input("Ask a question:") if query: answer = query_model(query) st.write("### Answer:") st.write(answer)