alaahilal commited on
Commit
9be167d
·
verified ·
1 Parent(s): 457f4e0

uploaded the files

Browse files
Files changed (3) hide show
  1. app.py +171 -0
  2. htmlTemplates.py +44 -0
  3. requirements.txt +95 -0
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from htmlTemplates import css, bot_template, user_template
11
+ from langchain.llms import HuggingFaceHub
12
+ import os
13
+
14
+ OPENAI_API_KEY=os.getenv('OPENAI_API_KEY')
15
+
16
+ def get_pdf_text(pdf_docs):
17
+ text = ""
18
+ for pdf in pdf_docs:
19
+ pdf_reader = PdfReader(pdf)
20
+ for page in pdf_reader.pages:
21
+ text += page.extract_text()
22
+ return text
23
+
24
+
25
+ def get_text_chunks(text):
26
+ text_splitter = CharacterTextSplitter(
27
+ separator="\n",
28
+ chunk_size=1000,
29
+ chunk_overlap=200,
30
+ length_function=len
31
+ )
32
+ chunks = text_splitter.split_text(text)
33
+ return chunks
34
+
35
+
36
+ def get_vectorstore(text_chunks):
37
+ embeddings = OpenAIEmbeddings()
38
+ # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
39
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
40
+ return vectorstore
41
+
42
+
43
+ def get_conversation_chain(vectorstore):
44
+ llm = ChatOpenAI()
45
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
46
+
47
+ memory = ConversationBufferMemory(
48
+ memory_key='chat_history', return_messages=True)
49
+ conversation_chain = ConversationalRetrievalChain.from_llm(
50
+ llm=llm,
51
+ retriever=vectorstore.as_retriever(),
52
+ memory=memory
53
+ )
54
+ return conversation_chain
55
+
56
+
57
+ def handle_userinput(user_question):
58
+ response = st.session_state.conversation({'question': user_question})
59
+ st.session_state.chat_history = response['chat_history']
60
+
61
+ for i, message in enumerate(st.session_state.chat_history):
62
+ if i % 2 == 0:
63
+ st.write(user_template.replace(
64
+ "{{MSG}}", message.content), unsafe_allow_html=True)
65
+ else:
66
+ st.write(bot_template.replace(
67
+ "{{MSG}}", message.content), unsafe_allow_html=True)
68
+
69
+
70
+ def main():
71
+ load_dotenv()
72
+ st.set_page_config(page_title="Chat with multiple PDFs",
73
+ page_icon=":books:")
74
+ st.write(css, unsafe_allow_html=True)
75
+
76
+ if "conversation" not in st.session_state:
77
+ st.session_state.conversation = None
78
+ if "chat_history" not in st.session_state:
79
+ st.session_state.chat_history = None
80
+
81
+ st.header("Chat with multiple PDFs :books:")
82
+ user_question = st.text_input("Ask a question about your documents:")
83
+ if user_question:
84
+ handle_userinput(user_question)
85
+
86
+ with st.sidebar:
87
+ st.subheader("Your documents")
88
+ pdf_docs = st.file_uploader(
89
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
90
+ if st.button("Process"):
91
+ with st.spinner("Processing"):
92
+ # get pdf text
93
+ raw_text = get_pdf_text(pdf_docs)
94
+
95
+ # get the text chunks
96
+ text_chunks = get_text_chunks(raw_text)
97
+
98
+ # create vector store
99
+ vectorstore = get_vectorstore(text_chunks)
100
+
101
+ # create conversation chain
102
+ st.session_state.conversation = get_conversation_chain(
103
+ vectorstore)
104
+
105
+
106
+ if __name__ == '__main__':
107
+ main()
108
+
109
+
110
+
111
+
112
+ # Attempting uninstall: tokenizers
113
+ # Found existing installation: tokenizers 0.15.2
114
+ # Uninstalling tokenizers-0.15.2:
115
+ # Successfully uninstalled tokenizers-0.15.2
116
+ # Attempting uninstall: faiss-cpu
117
+ # Found existing installation: faiss-cpu 1.8.0.post1
118
+ # Uninstalling faiss-cpu-1.8.0.post1:
119
+ # Successfully uninstalled faiss-cpu-1.8.0.post1
120
+ # Attempting uninstall: python-dotenv
121
+ # Found existing installation: python-dotenv 1.0.1
122
+ # Uninstalling python-dotenv-1.0.1:
123
+ # Successfully uninstalled python-dotenv-1.0.1
124
+ # Attempting uninstall: pydantic
125
+ # Found existing installation: pydantic 2.10.2
126
+ # Uninstalling pydantic-2.10.2:
127
+ # Successfully uninstalled pydantic-2.10.2
128
+ # Attempting uninstall: protobuf
129
+ # Found existing installation: protobuf 4.25.5
130
+ # Uninstalling protobuf-4.25.5:
131
+ # Successfully uninstalled protobuf-4.25.5
132
+ # Attempting uninstall: torch
133
+ # Found existing installation: torch 2.1.1
134
+ # Uninstalling torch-2.1.1:
135
+ # Successfully uninstalled torch-2.1.1
136
+ # Attempting uninstall: tiktoken
137
+ # Found existing installation: tiktoken 0.7.0
138
+ # Uninstalling tiktoken-0.7.0:
139
+ # Successfully uninstalled tiktoken-0.7.0
140
+ # Attempting uninstall: huggingface-hub
141
+ # Found existing installation: huggingface-hub 0.23.4
142
+ # Uninstalling huggingface-hub-0.23.4:
143
+ # Successfully uninstalled huggingface-hub-0.23.4
144
+ # Attempting uninstall: dataclasses-json
145
+ # Found existing installation: dataclasses-json 0.6.7
146
+ # Uninstalling dataclasses-json-0.6.7:
147
+ # Successfully uninstalled dataclasses-json-0.6.7
148
+ # Attempting uninstall: transformers
149
+ # Found existing installation: transformers 4.35.2
150
+ # Uninstalling transformers-4.35.2:
151
+ # Successfully uninstalled transformers-4.35.2
152
+ # Attempting uninstall: openai
153
+ # Found existing installation: openai 1.57.4
154
+ # Uninstalling openai-1.57.4:
155
+ # Successfully uninstalled openai-1.57.4
156
+ # Attempting uninstall: langchain
157
+ # Found existing installation: langchain 0.2.5
158
+ # Uninstalling langchain-0.2.5:
159
+ # Successfully uninstalled langchain-0.2.5
160
+ # Attempting uninstall: sentence-transformers
161
+ # Found existing installation: sentence-transformers 3.0.1
162
+ # Uninstalling sentence-transformers-3.0.1:
163
+ # Successfully uninstalled sentence-transformers-3.0.1
164
+ # Attempting uninstall: altair
165
+ # Found existing installation: altair 5.5.0
166
+ # Uninstalling altair-5.5.0:
167
+ # Successfully uninstalled altair-5.5.0
168
+ # Attempting uninstall: streamlit
169
+ # Found existing installation: streamlit 1.41.1
170
+ # Uninstalling streamlit-1.41.1:
171
+ # Successfully uninstalled streamlit-1.41.1
htmlTemplates.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ css = '''
2
+ <style>
3
+ .chat-message {
4
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
5
+ }
6
+ .chat-message.user {
7
+ background-color: #2b313e
8
+ }
9
+ .chat-message.bot {
10
+ background-color: #475063
11
+ }
12
+ .chat-message .avatar {
13
+ width: 20%;
14
+ }
15
+ .chat-message .avatar img {
16
+ max-width: 78px;
17
+ max-height: 78px;
18
+ border-radius: 50%;
19
+ object-fit: cover;
20
+ }
21
+ .chat-message .message {
22
+ width: 80%;
23
+ padding: 0 1.5rem;
24
+ color: #fff;
25
+ }
26
+ '''
27
+
28
+ bot_template = '''
29
+ <div class="chat-message bot">
30
+ <div class="avatar">
31
+ <img src="https://i.ibb.co/cN0nmSj/Screenshot-2023-05-28-at-02-37-21.png" style="max-height: 78px; max-width: 78px; border-radius: 50%; object-fit: cover;">
32
+ </div>
33
+ <div class="message">{{MSG}}</div>
34
+ </div>
35
+ '''
36
+
37
+ user_template = '''
38
+ <div class="chat-message user">
39
+ <div class="avatar">
40
+ <img src="https://i.ibb.co/rdZC7LZ/Photo-logo-1.png">
41
+ </div>
42
+ <div class="message">{{MSG}}</div>
43
+ </div>
44
+ '''
requirements.txt ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.4.4
2
+ aiohttp==3.11.11
3
+ aiosignal==1.3.2
4
+ altair==4.0.0
5
+ async-timeout==4.0.3
6
+ attrs==24.3.0
7
+ blinker==1.9.0
8
+ cachetools==5.5.0
9
+ certifi==2024.12.14
10
+ charset-normalizer==3.4.1
11
+ click==8.1.8
12
+ colorama==0.4.6
13
+ dataclasses-json==0.5.14
14
+ entrypoints==0.4
15
+ faiss-cpu==1.8.0.post1
16
+ filelock==3.16.1
17
+ frozenlist==1.5.0
18
+ fsspec==2024.12.0
19
+ gitdb==4.0.11
20
+ GitPython==3.1.43
21
+ greenlet==3.1.1
22
+ huggingface-hub==0.14.1
23
+ idna==3.10
24
+ importlib_metadata==8.5.0
25
+ InstructorEmbedding==1.0.1
26
+ Jinja2==3.1.5
27
+ joblib==1.4.2
28
+ jsonschema==4.23.0
29
+ jsonschema-specifications==2024.10.1
30
+ langchain==0.0.184
31
+ markdown-it-py==3.0.0
32
+ MarkupSafe==3.0.2
33
+ marshmallow==3.23.2
34
+ mdurl==0.1.2
35
+ mpmath==1.3.0
36
+ multidict==6.1.0
37
+ mypy-extensions==1.0.0
38
+ networkx==3.2.1
39
+ nltk==3.9.1
40
+ numexpr==2.10.2
41
+ numpy==1.26.4
42
+ openai==0.27.6
43
+ openapi-schema-pydantic==1.2.4
44
+ packaging==24.2
45
+ pandas==2.2.3
46
+ pillow==11.0.0
47
+ propcache==0.2.1
48
+ protobuf==3.20.3
49
+ pyarrow==18.1.0
50
+ pydantic==1.10.19
51
+ pydeck==0.9.1
52
+ Pygments==2.18.0
53
+ Pympler==1.1
54
+ PyPDF2==3.0.1
55
+ python-dateutil==2.9.0.post0
56
+ python-dotenv==1.0.0
57
+ pytz==2024.2
58
+ pywin32==308
59
+ PyYAML==6.0.2
60
+ referencing==0.35.1
61
+ regex==2024.11.6
62
+ requests==2.32.3
63
+ rich==13.9.4
64
+ rpds-py==0.22.3
65
+ safetensors==0.4.5
66
+ scikit-learn==1.6.0
67
+ scipy==1.13.1
68
+ semver==3.0.2
69
+ sentence-transformers==2.2.2
70
+ sentencepiece==0.2.0
71
+ six==1.17.0
72
+ smmap==5.0.1
73
+ SQLAlchemy==2.0.36
74
+ streamlit==1.18.1
75
+ sympy==1.13.1
76
+ tenacity==8.5.0
77
+ threadpoolctl==3.5.0
78
+ tiktoken==0.4.0
79
+ tokenizers==0.13.3
80
+ toml==0.10.2
81
+ toolz==1.0.0
82
+ torch==2.5.1
83
+ torchvision==0.20.1
84
+ tornado==6.4.2
85
+ tqdm==4.67.1
86
+ transformers==4.31.0
87
+ typing-inspect==0.9.0
88
+ typing_extensions==4.12.2
89
+ tzdata==2024.2
90
+ tzlocal==5.2
91
+ urllib3==2.3.0
92
+ validators==0.34.0
93
+ watchdog==6.0.0
94
+ yarl==1.18.3
95
+ zipp==3.21.0