Aditya757864 commited on
Commit
97bce8f
1 Parent(s): 301c5b6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py CHANGED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
5
+ from langchain.embeddings import GooglePalmEmbeddings
6
+ from langchain.llms import GooglePalm
7
+ from langchain.vectorstores import FAISS
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationBufferMemory
10
+
11
+ os.environ['GOOGLE_API_KEY'] = 'AIzaSyD8uzXToT4I2ABs7qo_XiuKh8-L2nuWCEM'
12
+
13
+
14
+ def get_pdf_text(pdf_docs):
15
+ text = ""
16
+ for pdf in pdf_docs:
17
+ pdf_reader = PdfReader(pdf)
18
+ for page in pdf_reader.pages:
19
+ text += page.extract_text()
20
+ return text
21
+
22
+
23
+ def get_text_chunks(text):
24
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20)
25
+ chunks = text_splitter.split_text(text)
26
+ return chunks
27
+
28
+
29
+ def get_vector_store(text_chunks):
30
+ embeddings = GooglePalmEmbeddings()
31
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
32
+ return vector_store
33
+
34
+
35
+ def get_conversational_chain(vector_store):
36
+ llm = GooglePalm()
37
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
38
+ conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory)
39
+ return conversation_chain
40
+
41
+
42
+ def user_input(user_question):
43
+ with st.container():
44
+ response = st.session_state.conversation({'question': user_question})
45
+ st.session_state.chatHistory = response['chat_history']
46
+ file_contents = ""
47
+ left , right = st.columns((2,1))
48
+ with left:
49
+ for i, message in enumerate(st.session_state.chatHistory):
50
+ if i % 2 == 0:
51
+ st.write("Human:", message.content)
52
+ else:
53
+ st.write("Bot:", message.content)
54
+ st.success("Done !")
55
+ with right:
56
+ for message in st.session_state.chatHistory:
57
+ file_contents += f"{message.content}\n"
58
+ file_name = "Chat_History.txt"
59
+ st.download_button("Download chat history👈", file_contents, file_name=file_name, mime="text/plain")
60
+
61
+
62
+ def summary(summarization):
63
+ with st.container():
64
+ file_contents = ''
65
+ left , right = st.columns((2,1))
66
+ with left:
67
+ if summarization:
68
+ response1 = st.session_state.conversation({'question': 'Retrieve one-line topics and their descriptors; create detailed, bulleted summaries for each topic.'})
69
+ st.write("summary:\n", response1['answer'])
70
+ st.success("Done !")
71
+ else:
72
+ response1 = {}
73
+
74
+ with right:
75
+ file_contents = response1.get('answer', '')
76
+ file_name = "summarization_result.txt"
77
+ st.download_button("Download summery👈", file_contents, file_name=file_name, mime="text/plain")
78
+
79
+
80
+ def main():
81
+ st.set_page_config("Chat with Multiple PDFs")
82
+ st.header("Chat with Multiple PDF 💬")
83
+ st.write("---")
84
+ with st.container():
85
+ with st.sidebar:
86
+ st.title("Settings")
87
+ st.subheader("Upload your Documents")
88
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Process Button", accept_multiple_files=True)
89
+ if st.button("Process"):
90
+ with st.spinner("Processing"):
91
+ raw_text = get_pdf_text(pdf_docs)
92
+ text_chunks = get_text_chunks(raw_text)
93
+ vector_store = get_vector_store(text_chunks)
94
+ st.session_state.conversation = get_conversational_chain(vector_store)
95
+ st.success("Done")
96
+ with st.container():
97
+ # Summarization Section
98
+ st.subheader("PDF Summarization")
99
+ st.write('Click on summary button to get summary on given uploaded file.')
100
+ summarization = st.button("Summarize 👍")
101
+ summary(summarization)
102
+
103
+ st.write("#")
104
+ st.write("---")
105
+
106
+ with st.container():
107
+ # Question Section
108
+ st.subheader("PDF question-answer section")
109
+ user_question = st.text_input("Ask a Question from the PDF Files")
110
+ if "conversation" not in st.session_state:
111
+ st.session_state.conversation = None
112
+ if "chatHistory" not in st.session_state:
113
+ st.session_state.chatHistory = None
114
+ if user_question:
115
+ user_input(user_question)
116
+
117
+
118
+ if __name__ == "__main__":
119
+ main()