onemriganka commited on
Commit
e34684a
·
1 Parent(s): 5dfc8f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ import google.generativeai as palm
5
+ from langchain.embeddings import GooglePalmEmbeddings
6
+ from langchain.llms import GooglePalm
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationBufferMemory
10
+ import os
11
+
12
+ os.environ['GOOGLE_API_KEY'] = 'AIzaSyAO1uqCO_1CTZV1zgIlUhk5Mv4Ey08cjzI'
13
+
14
+ def get_pdf_text(pdf_docs):
15
+ text=""
16
+ for pdf in pdf_docs:
17
+ pdf_reader= PdfReader(pdf)
18
+ for page in pdf_reader.pages:
19
+ text+= page.extract_text()
20
+ return text
21
+
22
+ def get_text_chunks(text):
23
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
24
+ chunks = text_splitter.split_text(text)
25
+ return chunks
26
+
27
+ def get_vector_store(text_chunks):
28
+ embeddings = GooglePalmEmbeddings()
29
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
30
+ return vector_store
31
+
32
+ def get_conversational_chain(vector_store):
33
+ llm=GooglePalm()
34
+ memory = ConversationBufferMemory(memory_key = "chat_history", return_messages=True)
35
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
36
+ return conversation_chain
37
+
38
+ def user_input(user_question):
39
+ response = st.session_state.conversation({'question': user_question})
40
+ st.session_state.chatHistory = response['chat_history']
41
+ for i, message in enumerate(st.session_state.chatHistory):
42
+ if i%2 == 0:
43
+ st.write("Human: ", message.content)
44
+ else:
45
+ st.write("Bot: ", message.content)
46
+ def main():
47
+ st.set_page_config("Chat with Multiple PDFs")
48
+ st.header("Chat with Multiple PDF 💬")
49
+ user_question = st.text_input("Ask a Question from the PDF Files")
50
+ if "conversation" not in st.session_state:
51
+ st.session_state.conversation = None
52
+ if "chatHistory" not in st.session_state:
53
+ st.session_state.chatHistory = None
54
+ if user_question:
55
+ user_input(user_question)
56
+ with st.sidebar:
57
+ st.title("Settings")
58
+ st.subheader("Upload your Documents")
59
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Process Button", accept_multiple_files=True)
60
+ if st.button("Process"):
61
+ with st.spinner("Processing"):
62
+ raw_text = get_pdf_text(pdf_docs)
63
+ text_chunks = get_text_chunks(raw_text)
64
+ vector_store = get_vector_store(text_chunks)
65
+ st.session_state.conversation = get_conversational_chain(vector_store)
66
+ st.success("Done")
67
+
68
+
69
+
70
+ if __name__ == "__main__":
71
+ main()