import os from dotenv import load_dotenv from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import RetrievalQA from langchain_community.llms import ChatGroq load_dotenv() groq_api_key = os.getenv("GROQ_API_KEY") hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") # Load PDF and prepare QA chain def create_qa_chain_from_pdf(pdf_path): loader = PyPDFLoader(pdf_path) documents = loader.load() splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) texts = splitter.split_documents(documents) embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3") vectorstore = FAISS.from_documents(texts, embeddings) llm = ChatGroq( model="llama3-8b-8192", temperature=0.3, api_key=groq_api_key, ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 1}), return_source_documents=True ) return qa_chain