DevBM commited on
Commit
ef6e0e7
·
verified ·
1 Parent(s): a767206

initial commit

Browse files
Files changed (2) hide show
  1. app.py +143 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain
9
+ from langchain.llms.huggingface_hub import HuggingFaceHub
10
+
11
+ css = '''
12
+ <style>
13
+ .chat-message {
14
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
15
+ }
16
+ .chat-message.user {
17
+ background-color: #2b313e
18
+ }
19
+ .chat-message.bot {
20
+ background-color: #475063
21
+ }
22
+ .chat-message .avatar {
23
+ width: 20%;
24
+ }
25
+ .chat-message .avatar img {
26
+ max-width: 78px;
27
+ max-height: 78px;
28
+ border-radius: 50%;
29
+ object-fit: cover;
30
+ }
31
+ .chat-message .message {
32
+ width: 80%;
33
+ padding: 0 1.5rem;
34
+ color: #fff;
35
+ }
36
+ '''
37
+
38
+ bot_template = '''
39
+ <div class="chat-message bot">
40
+ <div class="avatar">
41
+ <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
42
+ </div>
43
+ <div class="message">{{MSG}}</div>
44
+ </div>
45
+ '''
46
+
47
+ user_template = '''
48
+ <div class="chat-message user">
49
+ <div class="avatar">
50
+ <img src="https://i.ibb.co/rdZC7LZ/Photo-logo-1.png">
51
+ </div>
52
+ <div class="message">{{MSG}}</div>
53
+ </div>
54
+ '''
55
+
56
+ st.set_page_config(
57
+ page_icon=':balloon:',
58
+ page_title= 'dump',
59
+ layout='wide'
60
+ )
61
+ st.title(body='*Streamlit*')
62
+
63
+ def get_pdf_text(pdf_docs):
64
+ text = ""
65
+ for pdf in pdf_docs:
66
+ pdf_reader = PdfReader(pdf)
67
+ for page in pdf_reader.pages:
68
+ text += page.extract_text()
69
+ return text
70
+
71
+ def get_text_chunks(text):
72
+ text_splitter = CharacterTextSplitter(
73
+ separator='\n',
74
+ chunk_size = 1000,
75
+ chunk_overlap = 200,
76
+ length_function = len
77
+ )
78
+ chunks = text_splitter.split_text(text)
79
+ return chunks
80
+
81
+ def get_vectorstore(text_chunks):
82
+ embeddings = HuggingFaceInstructEmbeddings(model_name='hkunlp/instructor-xl')
83
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
84
+ return vectorstore
85
+
86
+ def get_conversation_chain(vectorstore):
87
+ llm = HuggingFaceHub(
88
+ repo_id = 'google/flan-t5-xxl',
89
+ model_kwargs = {"temperature":0.5, "max_length":512}
90
+ )
91
+ memory = ConversationBufferMemory(
92
+ memory_key='chat_history',
93
+ return_messages=True
94
+ )
95
+ conversation_chain = ConversationalRetrievalChain.from_llm(
96
+ llm = llm,
97
+ retriever=vectorstore.as_retriver(),
98
+ memory = memory
99
+ )
100
+ return conversation_chain
101
+
102
+ def handle_userinput(user_question):
103
+ response = st.session_state.conversation({'question': user_question})
104
+ st.session_state.chat_history = response['chat_history']
105
+ for i, message in enumerate(st.session_state.chat_history):
106
+ if i %2 == 0:
107
+ st.write(user_template.replace("{{MSG}}",message.content), unsafe_allow_html=True)
108
+ else:
109
+ st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
110
+
111
+ def main():
112
+ load_dotenv()
113
+ st.write(css, unsafe_allow_html=True)
114
+ if "conversation" not in st.session_state:
115
+ st.session_state.conversation = None
116
+ if "chat_history" not in st.session_state:
117
+ st.session_state.chat_history = None
118
+ st.header("Chat with multiple PDFs :books:")
119
+ user_question = st.text_input("Ask a question about your documents:")
120
+ if user_question:
121
+ handle_userinput(user_question)
122
+ with st.sidebar:
123
+ st.subheader("Your documents")
124
+ pdf_docs = st.file_uploader(
125
+ label="Upload your PDFs here and click on 'Process'",
126
+ accept_multiple_files=True
127
+ )
128
+ if st.button('Process'):
129
+ with st.spinner('Processing'):
130
+ # get pdf text
131
+ raw_text = get_pdf_text(pdf_docs)
132
+
133
+ # get the text chunks
134
+ text_chunks = get_text_chunks(raw_text)
135
+
136
+ # create vector store
137
+ vectorstore = get_vectorstore(text_chunks)
138
+
139
+ # create conversation chain
140
+ st.session_state.conversation = get_conversation_chain(vectorstore)
141
+
142
+ if __name__ == '__main__':
143
+ main()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ pandas