Chat-With-PDF / app.py
polaris404's picture
Files Added
2421315 verified
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_community.llms import OpenAI
from langchain_community.vectorstores import FAISS
from langchain_community.callbacks import get_openai_callback
from langchain.chains.question_answering import load_qa_chain
import streamlit as st
from PyPDF2 import PdfReader
from dotenv import load_dotenv
load_dotenv()
import re
##! Converting PDF to text ##
# def read_pdf(pdf):
# pdf_reader = PdfReader(pdf)
# text = ""
# for page in pdf_reader.pages:
# text += page.extract_text()
# return text
##! Saving vectorestore locally ##
# def save_vectorstore(title, vector_store):
# title = re.sub('\s+', '-', title)
# title = re.sub('[^a-zA-Z0-9\-]+', '', title)
# vector_store.save_local(f"./vectordb/{title[:35]}")
# print(f"{title} stored!")
# return True
##* Load vectorstore ##
def load_vectorstore(name, embeddings):
vector_store = FAISS.load_local(f"./vectordb/{name}/", embeddings=embeddings, allow_dangerous_deserialization=True)
return vector_store
def main():
st.header('Chat with PDF πŸ’¬')
# pdf = st.file_uploader("Upload PDF", type='pdf')
embeddings = OpenAIEmbeddings()
vector_store = ''
if 'clicked' not in st.session_state:
st.session_state.clicked = False
def click_button():
st.session_state.clicked = True
# st.button('Load', on_click=click_button)
with st.container(border=True):
st.markdown('''
*Disclaimer: Section for uploading the PDF file has been removed as the API calls for OpenAI are not free. I've included few pdfs for Q&A. You can access the source code and enable the section for uploading PDFs.*
''')
##! Converting text to word Embeddings ##
# if st.session_state.clicked:
# if pdf is not None:
# text = read_pdf(pdf)
# text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=1000,
# chunk_overlap=200,
# length_function=len
# )
# chunks = text_splitter.split_text(text=text)
# vector_store = FAISS.from_texts(chunks, embedding=embeddings)
# if save_btn:
# save_vectorstore(pdf.name[:-4], vector_store)
option = st.selectbox(label="Select the PDF: ", options=['Budget Speech 2024', 'The 100 Page Machine Learning Book'], index=None)
query = st.text_input("Ask questions from your PDF file:")
if query and not option:
st.warning("Please Select a PDF")
if option:
title = re.sub('\s+', '-', option)
title = re.sub('[^a-zA-Z0-9\-]+', '', title)
vector_store = load_vectorstore(title, embeddings)
if query:
docs = vector_store.similarity_search(query=query, k=3)
llm = OpenAI(temperature=0)
chain = load_qa_chain(llm=llm, chain_type="stuff")
with get_openai_callback() as cb:
response = chain.invoke({"input_documents": docs, "question":query})
print(cb)
st.write(response["output_text"])
if __name__ == '__main__':
main()