import os import PyPDF2 import faiss import streamlit as st from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from groq import Groq # Initialize Groq API GROQ_API_KEY = "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" client = Groq(api_key=GROQ_API_KEY) # Function to extract text from PDF def extract_text_from_pdf(pdf_path): text = "" with open(pdf_path, "rb") as file: reader = PyPDF2.PdfReader(file) for page in reader.pages: text += page.extract_text() return text # Function to create chunks and embeddings using LangChain def process_text_with_langchain(text): # Split text into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) chunks = text_splitter.split_text(text) # Create embeddings and FAISS index embeddings = HuggingFaceEmbeddings() vectorstore = FAISS.from_texts(chunks, embeddings) return vectorstore, chunks # Function to query FAISS index def query_faiss_index(query, vectorstore): docs = vectorstore.similarity_search(query, k=3) results = [doc.page_content for doc in docs] return results # Function to interact with Groq LLM def ask_groq(query): chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": query, } ], model="llama3-8b-8192", stream=False, ) return chat_completion.choices[0].message.content # Streamlit app st.title("RAG-Based Chatbot") uploaded_file = st.file_uploader("Upload a PDF file", type="pdf") if uploaded_file is not None: with open("uploaded_file.pdf", "wb") as f: f.write(uploaded_file.read()) st.info("Processing the PDF...") text = extract_text_from_pdf("uploaded_file.pdf") vectorstore, chunks = process_text_with_langchain(text) st.success("PDF processed and indexed successfully!") query = st.text_input("Ask a question about the document") if query: st.info("Searching relevant chunks...") relevant_chunks = query_faiss_index(query, vectorstore) context = "\n".join(relevant_chunks) st.info("Getting response from the language model...") response = ask_groq(f"Context: {context}\n\nQuestion: {query}") st.success(response)