from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain_community.llms import OpenAI from langchain_community.vectorstores import FAISS from langchain_community.callbacks import get_openai_callback from langchain.chains.question_answering import load_qa_chain import streamlit as st from PyPDF2 import PdfReader from dotenv import load_dotenv load_dotenv() import re ##! Converting PDF to text ## # def read_pdf(pdf): # pdf_reader = PdfReader(pdf) # text = "" # for page in pdf_reader.pages: # text += page.extract_text() # return text ##! Saving vectorestore locally ## # def save_vectorstore(title, vector_store): # title = re.sub('\s+', '-', title) # title = re.sub('[^a-zA-Z0-9\-]+', '', title) # vector_store.save_local(f"./vectordb/{title[:35]}") # print(f"{title} stored!") # return True ##* Load vectorstore ## def load_vectorstore(name, embeddings): vector_store = FAISS.load_local(f"./vectordb/{name}/", embeddings=embeddings, allow_dangerous_deserialization=True) return vector_store def main(): st.header('Chat with PDF 💬') # pdf = st.file_uploader("Upload PDF", type='pdf') embeddings = OpenAIEmbeddings() vector_store = '' if 'clicked' not in st.session_state: st.session_state.clicked = False def click_button(): st.session_state.clicked = True # st.button('Load', on_click=click_button) with st.container(border=True): st.markdown(''' *Disclaimer: Section for uploading the PDF file has been removed as the API calls for OpenAI are not free. I've included few pdfs for Q&A. You can access the source code and enable the section for uploading PDFs.* ''') ##! Converting text to word Embeddings ## # if st.session_state.clicked: # if pdf is not None: # text = read_pdf(pdf) # text_splitter = RecursiveCharacterTextSplitter( # chunk_size=1000, # chunk_overlap=200, # length_function=len # ) # chunks = text_splitter.split_text(text=text) # vector_store = FAISS.from_texts(chunks, embedding=embeddings) # if save_btn: # save_vectorstore(pdf.name[:-4], vector_store) option = st.selectbox(label="Select the PDF: ", options=['Budget Speech 2024', 'The 100 Page Machine Learning Book'], index=None) query = st.text_input("Ask questions from your PDF file:") if query and not option: st.warning("Please Select a PDF") if option: title = re.sub('\s+', '-', option) title = re.sub('[^a-zA-Z0-9\-]+', '', title) vector_store = load_vectorstore(title, embeddings) if query: docs = vector_store.similarity_search(query=query, k=3) llm = OpenAI(temperature=0) chain = load_qa_chain(llm=llm, chain_type="stuff") with get_openai_callback() as cb: response = chain.invoke({"input_documents": docs, "question":query}) print(cb) st.write(response["output_text"]) if __name__ == '__main__': main()