Npps commited on
Commit
e69f7f6
1 Parent(s): 434f6ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +321 -319
app.py CHANGED
@@ -1,319 +1,321 @@
1
- import streamlit as st
2
- import os
3
- import glob
4
- from typing import Union
5
- from io import BytesIO
6
- from typing import List
7
- from dotenv import load_dotenv
8
- from multiprocessing import Pool
9
- from constants import CHROMA_SETTINGS
10
- import tempfile
11
- from tqdm import tqdm
12
- import argparse
13
- import time
14
- from PIL import Image
15
- from langchain.chains import RetrievalQA
16
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
17
- from langchain_community.chat_models import ChatOpenAI
18
- from langchain.chains import ConversationalRetrievalChain
19
- from langchain.docstore.document import Document
20
- from langchain_community.embeddings import OpenAIEmbeddings
21
- from langchain.memory import ConversationBufferMemory
22
- from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
23
- from langchain_community.vectorstores import FAISS,Chroma
24
- from langchain_community.llms import Ollama
25
- from langchain_cohere import CohereEmbeddings
26
-
27
- load_dotenv()
28
-
29
-
30
- ######################### HTML CSS ############################
31
- css = '''
32
- <style>
33
- .chat-message {
34
- padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
35
- }
36
- .chat-message.user {
37
- background-color: #2b313e
38
- }
39
- .chat-message.bot {
40
- background-color: #475063
41
- }
42
- .chat-message .avatar {
43
- width: 20%;
44
- }
45
- .chat-message .avatar img {
46
- max-width: 78px;
47
- max-height: 78px;
48
- border-radius: 50%;
49
- object-fit: cover;
50
- }
51
- .chat-message .message {
52
- width: 80%;
53
- padding: 0 1.5rem;
54
- color: #fff;
55
- }
56
- '''
57
-
58
- bot_template = '''
59
- <div class="chat-message bot">
60
- <div class="avatar">
61
- <img src="https://i.pinimg.com/originals/0c/67/5a/0c675a8e1061478d2b7b21b330093444.gif" style="max-height: 70px; max-width: 50px; border-radius: 50%; object-fit: cover;">
62
- </div>
63
- <div class="message">{{MSG}}</div>
64
- </div>
65
- '''
66
-
67
-
68
- user_template = '''
69
- <div class="chat-message user">
70
- <div class="avatar">
71
- <img src="https://th.bing.com/th/id/OIP.uDqZFTOXkEWF9PPDHLCntAHaHa?pid=ImgDet&rs=1" style="max-height: 80px; max-width: 50px; border-radius: 50%; object-fit: cover;">
72
- </div>
73
- <div class="message">{{MSG}}</div>
74
- </div>
75
- '''
76
- ###################################################
77
-
78
- chunk_size = 500
79
- chunk_overlap = 50
80
- persist_directory = os.environ.get('PERSIST_DIRECTORY')
81
- print(persist_directory)
82
- source_directory = os.environ.get('SOURCE_DIRECTORY', 'source_documents')
83
- target_source_chunks= int(os.environ.get('TARGET_SOURCE_CHUNKS', 5))
84
- embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME')
85
- model_type=os.environ.get('MODEL_TYPE')
86
-
87
-
88
- from langchain_community.document_loaders import (
89
- CSVLoader,
90
- PyMuPDFLoader,
91
- TextLoader)
92
-
93
-
94
- # Map file extensions to document loaders and their arguments
95
- LOADER_MAPPING = {
96
- ".csv": (CSVLoader, {}),
97
- ".pdf": (PyMuPDFLoader, {}),
98
- ".txt": (TextLoader, {"encoding": "utf8"}),
99
- }
100
-
101
-
102
-
103
-
104
-
105
-
106
- def load_single_document(file_content: BytesIO, file_type:str) -> List[Document]:
107
- ext = "." + file_type.rsplit("/", 1)[1]
108
-
109
- with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as temp_file:
110
- temp_file.write(file_content.getvalue())
111
- temp_file_path = temp_file.name
112
-
113
- if ext in LOADER_MAPPING:
114
- loader_class, loader_args = LOADER_MAPPING[ext]
115
- loader = loader_class(temp_file_path, **loader_args)
116
- results = loader.load()
117
- os.remove(temp_file_path)
118
- return results
119
-
120
- raise ValueError(f"Unsupported file extension '{ext}'")
121
-
122
-
123
-
124
- def load_uploaded_documents(uploaded_files, uploaded_files_type, ignored_files: List[str] = []) -> List[Document]:
125
- with Pool(processes=os.cpu_count()) as pool:
126
- results = []
127
- with tqdm(total=len(uploaded_files), desc='Loading new documents', ncols=80) as progress_bar:
128
- for i, uploaded_file in enumerate(uploaded_files):
129
- file_type = uploaded_files_type[i]
130
- file_content=BytesIO(uploaded_file.read())
131
- docs = load_single_document(file_content, file_type)
132
- results.extend(docs)
133
- progress_bar.update()
134
- return results
135
-
136
-
137
- def get_pdf_text(uploaded_files):
138
- ignored_files = [] # Add files to ignore if needed
139
-
140
- uploaded_files_list = [file for file in uploaded_files]
141
- uploaded_files_type = [file.type for file in uploaded_files]
142
- results = load_uploaded_documents(uploaded_files_list, uploaded_files_type, ignored_files)
143
- return results
144
-
145
-
146
-
147
-
148
- def does_vectorstore_exist(persist_directory: str) -> bool:
149
- """
150
- Checks if vectorstore exists
151
- """
152
- if os.path.exists(os.path.join(persist_directory, 'index')):
153
- if os.path.exists(os.path.join(persist_directory, 'chroma-collections.parquet')) and os.path.exists(os.path.join(persist_directory, 'chroma-embeddings.parquet')):
154
- list_index_files = glob.glob(os.path.join(persist_directory, 'index/*.bin'))
155
- list_index_files += glob.glob(os.path.join(persist_directory, 'index/*.pkl'))
156
- # At least 1 documents are needed in a working vectorstore
157
- if len(list_index_files) > 0:
158
- print("Yes vectorstore exists")
159
- return True
160
- return False
161
-
162
-
163
-
164
- def get_text_chunks(results,chunk_size,chunk_overlap):
165
- texts=[]
166
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
167
- texts = text_splitter.split_documents(results)
168
- return texts
169
-
170
-
171
- def get_vectorstore(results,embeddings_model_name,persist_directory,client_settings,chunk_size,chunk_overlap):
172
- if embeddings_model_name == "openai":
173
- embeddings = OpenAIEmbeddings()
174
- print('OpenAI embeddings loaded')
175
- elif embeddings_model_name == "Cohereembeddings":
176
- embeddings = CohereEmbeddings()
177
- print('Cohere embeddings loaded')
178
-
179
- if does_vectorstore_exist(persist_directory):
180
- # Update and store locally vectorstore
181
- print(f"Appending to existing vectorstore at {persist_directory}")
182
- db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
183
- collection = db.get()
184
- #print(f"Creating embeddings. May take some minutes...")
185
- # #print(f"Loaded text size:{len(texts)}")
186
- texts=get_text_chunks(results,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
187
- if len(texts)>0:
188
- db.add_documents(texts)
189
- else:
190
- # Create and store locally vectorstore
191
- print("Creating new vectorstore")
192
- print(f"Creating embeddings. May take some minutes...")
193
- texts=get_text_chunks(results,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
194
-
195
- db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
196
- db.add_documents(texts)
197
-
198
- return db
199
-
200
-
201
- def get_conversation_chain(vectorstore,target_source_chunks,model_type):
202
- retriever = vectorstore.as_retriever(search_kwargs={"k": target_source_chunks})
203
-
204
- # activate/deactivate the streaming StdOut callback for LLMs
205
- #callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]
206
- # Prepare the LLM.
207
-
208
- match model_type:
209
- case "OpenaAI":
210
- llm= ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
211
- case "Llama3":
212
- llm = Ollama(model="llama3")
213
- case _default:
214
- # raise exception if model_type is not supported
215
- raise Exception(f"Model type {model_type} is not supported. Please choose one of the following: ")
216
-
217
-
218
- #llm = ChatOpenAI()
219
- # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
220
- # llm = GPT4All(model=model_path, max_tokens=model_n_ctx, backend='gptj', n_batch=model_n_batch, verbose=False)
221
-
222
- memory = ConversationBufferMemory(
223
- memory_key='chat_history', return_messages=True)
224
- conversation_chain = ConversationalRetrievalChain.from_llm(
225
- llm=llm,
226
- retriever=retriever,
227
- memory=memory
228
- )
229
- return conversation_chain
230
-
231
-
232
- st.set_page_config(page_title="Generate Insights",page_icon=":bar_chart:")
233
-
234
-
235
- def handle_userinput(user_question):
236
- response = st.session_state.conversation({'question': user_question})
237
- st.session_state.chat_history = response['chat_history']
238
-
239
- for i, message in enumerate(st.session_state.chat_history):
240
- if i % 2 == 0:
241
- st.write(user_template.replace(
242
- "{{MSG}}", message.content), unsafe_allow_html=True)
243
- else:
244
- st.write(bot_template.replace(
245
- "{{MSG}}", message.content), unsafe_allow_html=True)
246
-
247
-
248
-
249
-
250
-
251
- def add_logo(logo_path, width, height):
252
- """Read and return a resized logo"""
253
- logo = Image.open(logo_path)
254
- modified_logo = logo.resize((width, height))
255
- return modified_logo
256
-
257
- st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
258
- col1, col2,col3,col4,col5,col6 = st.columns(6)
259
-
260
- with col5:
261
- my_logo = add_logo(logo_path="CampusX.jfif", width=100, height=20)
262
- st.image(my_logo)
263
- with col6:
264
- pg_logo=add_logo(logo_path="Q&A logo.jfif", width=60, height=40)
265
- st.image(pg_logo)
266
-
267
-
268
-
269
-
270
-
271
- def main():
272
- load_dotenv()
273
- css2 = '''
274
- <style>
275
- [data-testid="stSidebar"]{
276
- min-width: 300px;
277
- max-width: 300px;
278
- }
279
- </style>
280
- '''
281
- st.markdown(css2, unsafe_allow_html=True)
282
-
283
- st.write(css, unsafe_allow_html=True)
284
-
285
- if "conversation" not in st.session_state:
286
- st.session_state.conversation = None
287
- if "chat_history" not in st.session_state:
288
- st.session_state.chat_history = None
289
-
290
- st.header(":blue Generate Insights :bar_chart:")
291
- user_question = st.text_input("Ask a question about your documents:")
292
- if user_question:
293
- handle_userinput(user_question)
294
-
295
- with st.sidebar:
296
- st.subheader("Your documents")
297
- uploaded_files = st.file_uploader("Upload documents", type=["pdf", "xlsx",'csv'], accept_multiple_files=True)
298
- #texts=[]
299
-
300
- if st.button("Process"):
301
- with st.spinner("Processing"):
302
-
303
- # get pdf text
304
- if uploaded_files is not None :
305
- raw_text = get_pdf_text(uploaded_files=uploaded_files)
306
-
307
- # get the text chunks
308
- text_chunks = get_text_chunks(results=raw_text,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
309
-
310
- # create vector store
311
- vectorstore = get_vectorstore(results=text_chunks,embeddings_model_name=embeddings_model_name,persist_directory=persist_directory,client_settings=CHROMA_SETTINGS,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
312
-
313
- # create conversation chain
314
- st.session_state.conversation = get_conversation_chain(vectorstore=vectorstore,target_source_chunks=target_source_chunks,model_type=model_type)
315
-
316
-
317
- if __name__ == '__main__':
318
- main()
319
-
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import glob
4
+ from typing import Union
5
+ from io import BytesIO
6
+ from typing import List
7
+ from dotenv import load_dotenv
8
+ from multiprocessing import Pool
9
+ from constants import CHROMA_SETTINGS
10
+ import tempfile
11
+ from tqdm import tqdm
12
+ import argparse
13
+ import time
14
+ from PIL import Image
15
+ from langchain.chains import RetrievalQA
16
+ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
17
+ from langchain_community.chat_models import ChatOpenAI
18
+ from langchain.chains import ConversationalRetrievalChain
19
+ from langchain.docstore.document import Document
20
+ from langchain_community.embeddings import OpenAIEmbeddings
21
+ from langchain.memory import ConversationBufferMemory
22
+ from langchain.text_splitter import CharacterTextSplitter,RecursiveCharacterTextSplitter
23
+ from langchain_community.vectorstores import FAISS,Chroma
24
+ from langchain_community.llms import Ollama
25
+ from langchain_cohere import CohereEmbeddings
26
+
27
+
28
+ os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
29
+
30
+ os.environ['COHERE_API_KEY'] = os.getenv('COHERE_API_KEY')
31
+
32
+ ######################### HTML CSS ############################
33
+ css = '''
34
+ <style>
35
+ .chat-message {
36
+ padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
37
+ }
38
+ .chat-message.user {
39
+ background-color: #2b313e
40
+ }
41
+ .chat-message.bot {
42
+ background-color: #475063
43
+ }
44
+ .chat-message .avatar {
45
+ width: 20%;
46
+ }
47
+ .chat-message .avatar img {
48
+ max-width: 78px;
49
+ max-height: 78px;
50
+ border-radius: 50%;
51
+ object-fit: cover;
52
+ }
53
+ .chat-message .message {
54
+ width: 80%;
55
+ padding: 0 1.5rem;
56
+ color: #fff;
57
+ }
58
+ '''
59
+
60
+ bot_template = '''
61
+ <div class="chat-message bot">
62
+ <div class="avatar">
63
+ <img src="https://i.pinimg.com/originals/0c/67/5a/0c675a8e1061478d2b7b21b330093444.gif" style="max-height: 70px; max-width: 50px; border-radius: 50%; object-fit: cover;">
64
+ </div>
65
+ <div class="message">{{MSG}}</div>
66
+ </div>
67
+ '''
68
+
69
+
70
+ user_template = '''
71
+ <div class="chat-message user">
72
+ <div class="avatar">
73
+ <img src="https://th.bing.com/th/id/OIP.uDqZFTOXkEWF9PPDHLCntAHaHa?pid=ImgDet&rs=1" style="max-height: 80px; max-width: 50px; border-radius: 50%; object-fit: cover;">
74
+ </div>
75
+ <div class="message">{{MSG}}</div>
76
+ </div>
77
+ '''
78
+ ###################################################
79
+
80
+ chunk_size = 500
81
+ chunk_overlap = 50
82
+ persist_directory = os.environ.get('PERSIST_DIRECTORY')
83
+ print(persist_directory)
84
+ source_directory = os.environ.get('SOURCE_DIRECTORY', 'source_documents')
85
+ target_source_chunks= int(os.environ.get('TARGET_SOURCE_CHUNKS', 5))
86
+ embeddings_model_name = os.environ.get('EMBEDDINGS_MODEL_NAME')
87
+ model_type=os.environ.get('MODEL_TYPE')
88
+
89
+
90
+ from langchain_community.document_loaders import (
91
+ CSVLoader,
92
+ PyMuPDFLoader,
93
+ TextLoader)
94
+
95
+
96
+ # Map file extensions to document loaders and their arguments
97
+ LOADER_MAPPING = {
98
+ ".csv": (CSVLoader, {}),
99
+ ".pdf": (PyMuPDFLoader, {}),
100
+ ".txt": (TextLoader, {"encoding": "utf8"}),
101
+ }
102
+
103
+
104
+
105
+
106
+
107
+
108
+ def load_single_document(file_content: BytesIO, file_type:str) -> List[Document]:
109
+ ext = "." + file_type.rsplit("/", 1)[1]
110
+
111
+ with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as temp_file:
112
+ temp_file.write(file_content.getvalue())
113
+ temp_file_path = temp_file.name
114
+
115
+ if ext in LOADER_MAPPING:
116
+ loader_class, loader_args = LOADER_MAPPING[ext]
117
+ loader = loader_class(temp_file_path, **loader_args)
118
+ results = loader.load()
119
+ os.remove(temp_file_path)
120
+ return results
121
+
122
+ raise ValueError(f"Unsupported file extension '{ext}'")
123
+
124
+
125
+
126
+ def load_uploaded_documents(uploaded_files, uploaded_files_type, ignored_files: List[str] = []) -> List[Document]:
127
+ with Pool(processes=os.cpu_count()) as pool:
128
+ results = []
129
+ with tqdm(total=len(uploaded_files), desc='Loading new documents', ncols=80) as progress_bar:
130
+ for i, uploaded_file in enumerate(uploaded_files):
131
+ file_type = uploaded_files_type[i]
132
+ file_content=BytesIO(uploaded_file.read())
133
+ docs = load_single_document(file_content, file_type)
134
+ results.extend(docs)
135
+ progress_bar.update()
136
+ return results
137
+
138
+
139
+ def get_pdf_text(uploaded_files):
140
+ ignored_files = [] # Add files to ignore if needed
141
+
142
+ uploaded_files_list = [file for file in uploaded_files]
143
+ uploaded_files_type = [file.type for file in uploaded_files]
144
+ results = load_uploaded_documents(uploaded_files_list, uploaded_files_type, ignored_files)
145
+ return results
146
+
147
+
148
+
149
+
150
+ def does_vectorstore_exist(persist_directory: str) -> bool:
151
+ """
152
+ Checks if vectorstore exists
153
+ """
154
+ if os.path.exists(os.path.join(persist_directory, 'index')):
155
+ if os.path.exists(os.path.join(persist_directory, 'chroma-collections.parquet')) and os.path.exists(os.path.join(persist_directory, 'chroma-embeddings.parquet')):
156
+ list_index_files = glob.glob(os.path.join(persist_directory, 'index/*.bin'))
157
+ list_index_files += glob.glob(os.path.join(persist_directory, 'index/*.pkl'))
158
+ # At least 1 documents are needed in a working vectorstore
159
+ if len(list_index_files) > 0:
160
+ print("Yes vectorstore exists")
161
+ return True
162
+ return False
163
+
164
+
165
+
166
+ def get_text_chunks(results,chunk_size,chunk_overlap):
167
+ texts=[]
168
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
169
+ texts = text_splitter.split_documents(results)
170
+ return texts
171
+
172
+
173
+ def get_vectorstore(results,embeddings_model_name,persist_directory,client_settings,chunk_size,chunk_overlap):
174
+ if embeddings_model_name == "openai":
175
+ embeddings = OpenAIEmbeddings()
176
+ print('OpenAI embeddings loaded')
177
+ elif embeddings_model_name == "Cohereembeddings":
178
+ embeddings = CohereEmbeddings()
179
+ print('Cohere embeddings loaded')
180
+
181
+ if does_vectorstore_exist(persist_directory):
182
+ # Update and store locally vectorstore
183
+ print(f"Appending to existing vectorstore at {persist_directory}")
184
+ db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
185
+ collection = db.get()
186
+ #print(f"Creating embeddings. May take some minutes...")
187
+ # #print(f"Loaded text size:{len(texts)}")
188
+ texts=get_text_chunks(results,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
189
+ if len(texts)>0:
190
+ db.add_documents(texts)
191
+ else:
192
+ # Create and store locally vectorstore
193
+ print("Creating new vectorstore")
194
+ print(f"Creating embeddings. May take some minutes...")
195
+ texts=get_text_chunks(results,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
196
+
197
+ db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
198
+ db.add_documents(texts)
199
+
200
+ return db
201
+
202
+
203
+ def get_conversation_chain(vectorstore,target_source_chunks,model_type):
204
+ retriever = vectorstore.as_retriever(search_kwargs={"k": target_source_chunks})
205
+
206
+ # activate/deactivate the streaming StdOut callback for LLMs
207
+ #callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]
208
+ # Prepare the LLM.
209
+
210
+ # match model_type:
211
+ # case "OpenaAI":
212
+ # llm= ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
213
+ # case "Llama3":
214
+ # llm = Ollama(model="llama3:8b")
215
+ # case _default:
216
+ # # raise exception if model_type is not supported
217
+ # raise Exception(f"Model type {model_type} is not supported. Please choose one of the following: ")
218
+
219
+ llm = Ollama(model = 'llama3:8b')
220
+ #llm = ChatOpenAI()
221
+ # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
222
+ # llm = GPT4All(model=model_path, max_tokens=model_n_ctx, backend='gptj', n_batch=model_n_batch, verbose=False)
223
+
224
+ memory = ConversationBufferMemory(
225
+ memory_key='chat_history', return_messages=True)
226
+ conversation_chain = ConversationalRetrievalChain.from_llm(
227
+ llm=llm,
228
+ retriever=retriever,
229
+ memory=memory
230
+ )
231
+ return conversation_chain
232
+
233
+
234
+ st.set_page_config(page_title="Generate Insights",page_icon=":bar_chart:")
235
+
236
+
237
+ def handle_userinput(user_question):
238
+ response = st.session_state.conversation({'question': user_question})
239
+ st.session_state.chat_history = response['chat_history']
240
+
241
+ for i, message in enumerate(st.session_state.chat_history):
242
+ if i % 2 == 0:
243
+ st.write(user_template.replace(
244
+ "{{MSG}}", message.content), unsafe_allow_html=True)
245
+ else:
246
+ st.write(bot_template.replace(
247
+ "{{MSG}}", message.content), unsafe_allow_html=True)
248
+
249
+
250
+
251
+
252
+
253
+ def add_logo(logo_path, width, height):
254
+ """Read and return a resized logo"""
255
+ logo = Image.open(logo_path)
256
+ modified_logo = logo.resize((width, height))
257
+ return modified_logo
258
+
259
+ st.markdown(f'<style>{css}</style>', unsafe_allow_html=True)
260
+ col1, col2,col3,col4,col5,col6 = st.columns(6)
261
+
262
+ with col5:
263
+ my_logo = add_logo(logo_path="CampusX.jfif", width=100, height=20)
264
+ st.image(my_logo)
265
+ with col6:
266
+ pg_logo=add_logo(logo_path="Q&A logo.jfif", width=60, height=40)
267
+ st.image(pg_logo)
268
+
269
+
270
+
271
+
272
+
273
+ def main():
274
+ load_dotenv()
275
+ css2 = '''
276
+ <style>
277
+ [data-testid="stSidebar"]{
278
+ min-width: 300px;
279
+ max-width: 300px;
280
+ }
281
+ </style>
282
+ '''
283
+ st.markdown(css2, unsafe_allow_html=True)
284
+
285
+ st.write(css, unsafe_allow_html=True)
286
+
287
+ if "conversation" not in st.session_state:
288
+ st.session_state.conversation = None
289
+ if "chat_history" not in st.session_state:
290
+ st.session_state.chat_history = None
291
+
292
+ st.header(":blue Generate Insights :bar_chart:")
293
+ user_question = st.text_input("Ask a question about your documents:")
294
+ if user_question:
295
+ handle_userinput(user_question)
296
+
297
+ with st.sidebar:
298
+ st.subheader("Your documents")
299
+ uploaded_files = st.file_uploader("Upload documents", type=["pdf", "xlsx",'csv'], accept_multiple_files=True)
300
+ #texts=[]
301
+
302
+ if st.button("Process"):
303
+ with st.spinner("Processing"):
304
+
305
+ # get pdf text
306
+ if uploaded_files is not None :
307
+ raw_text = get_pdf_text(uploaded_files=uploaded_files)
308
+
309
+ # get the text chunks
310
+ text_chunks = get_text_chunks(results=raw_text,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
311
+
312
+ # create vector store
313
+ vectorstore = get_vectorstore(results=text_chunks,embeddings_model_name=embeddings_model_name,persist_directory=persist_directory,client_settings=CHROMA_SETTINGS,chunk_size=chunk_size,chunk_overlap=chunk_overlap)
314
+
315
+ # create conversation chain
316
+ st.session_state.conversation = get_conversation_chain(vectorstore=vectorstore,target_source_chunks=target_source_chunks,model_type=model_type)
317
+
318
+
319
+ if __name__ == '__main__':
320
+ main()
321
+