import os import faiss import streamlit as st from PyPDF2 import PdfReader from sentence_transformers import SentenceTransformer from groq import Groq from dotenv import load_dotenv # Initialize Groq client client = Groq(api_key="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") # Load Sentence Transformer model model = SentenceTransformer("all-MiniLM-L6-v2") # Initialize FAISS dimension = 384 # Embedding size for the Sentence Transformer model index = faiss.IndexFlatL2(dimension) # Function to process PDF and create embeddings def process_pdf(pdf_file): pdf_reader = PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() chunks = [text[i:i + 500] for i in range(0, len(text), 500)] # Chunk into 500-char blocks embeddings = model.encode(chunks) index.add(embeddings) return chunks, embeddings # Function to query FAISS and generate a response def query_model(query): query_vector = model.encode([query]) _, indices = index.search(query_vector, k=3) # Top 3 similar chunks response_chunks = [stored_chunks[idx] for idx in indices[0]] context = " ".join(response_chunks) # Groq API call chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": f"Context: {context}\n\nQuery: {query}", } ], model="llama3-8b-8192", ) return chat_completion.choices[0].message.content # Streamlit app st.title("RAG-based PDF Question Answering") st.write("Upload a PDF and ask questions based on its content.") uploaded_file = st.file_uploader("Upload your PDF", type=["pdf"]) if uploaded_file: stored_chunks, _ = process_pdf(uploaded_file) st.success("PDF processed and embeddings created.") query = st.text_input("Ask a question:") if query: answer = query_model(query) st.write("### Answer:") st.write(answer)