Spaces:
Sleeping
Sleeping
import os | |
from dotenv import load_dotenv | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chains import RetrievalQA | |
from langchain_community.llms import ChatGroq | |
load_dotenv() | |
groq_api_key = os.getenv("GROQ_API_KEY") | |
hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") | |
# Load PDF and prepare QA chain | |
def create_qa_chain_from_pdf(pdf_path): | |
loader = PyPDFLoader(pdf_path) | |
documents = loader.load() | |
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
texts = splitter.split_documents(documents) | |
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-m3") | |
vectorstore = FAISS.from_documents(texts, embeddings) | |
llm = ChatGroq( | |
model="llama3-8b-8192", | |
temperature=0.3, | |
api_key=groq_api_key, | |
) | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=vectorstore.as_retriever(search_kwargs={"k": 1}), | |
return_source_documents=True | |
) | |
return qa_chain | |