vineeth N commited on
Commit
030bc4f
·
verified ·
1 Parent(s): eaa6b6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +277 -38
app.py CHANGED
@@ -1,38 +1,277 @@
1
- import streamlit as st
2
- from langchain_groq import ChatGroq
3
- from langchain.schema import HumanMessage, AIMessage
4
-
5
- GROQ_API_KEY='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'
6
- # Initialize the ChatGroq model
7
- llm = ChatGroq(temperature=0, model_name='llama-3.1-8b-instant', groq_api_key=GROQ_API_KEY)
8
-
9
-
10
- st.title("Botyy")
11
-
12
- # Initialize chat history
13
-
14
- if "messages" not in st.session_state:
15
- st.session_state.messages = []
16
-
17
- # Display chat messages from history on app rerun
18
- for message in st.session_state.messages:
19
- with st.chat_message(message["role"]):
20
- st.markdown(message["content"])
21
-
22
- # React to user input
23
- if prompt := st.chat_input("What is your question?"):
24
- # Display user message in chat message container
25
- st.chat_message("user").markdown(prompt)
26
-
27
- # Add user message to chat history
28
- st.session_state.messages.append({"role": "user", "content": prompt})
29
-
30
- # Generate AI response
31
- response = llm([HumanMessage(content=prompt)])
32
-
33
- # Display AI response in chat message container
34
- with st.chat_message("assistant"):
35
- st.markdown(response.content)
36
-
37
- # Add AI response to chat history
38
- st.session_state.messages.append({"role": "assistant", "content": response.content})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import os
2
+ # from typing import List
3
+ # from dotenv import load_dotenv
4
+ # import chainlit as cl
5
+ # from langchain_community.embeddings import HuggingFaceEmbeddings
6
+ # from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ # from langchain_community.vectorstores import FAISS
8
+ # from langchain_community.document_loaders import PyPDFLoader
9
+ # from langchain.chains import RetrievalQA
10
+ # from langchain_groq import ChatGroq
11
+ # from langchain_huggingface import HuggingFaceEmbeddings
12
+
13
+ # # Load environment variables
14
+ # load_dotenv()
15
+
16
+ # # Initialize embedding model
17
+ # # embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
18
+
19
+ # openai.api_key = os.getenv("OPENAI_API_KEY")
20
+
21
+ # # Initialize embedding model using OpenAI
22
+ # embeddings = OpenAIEmbeddings(openai_api_key=openai.api_key,model="text-embedding-3-small")
23
+
24
+
25
+ # # Initialize vector store
26
+ # vector_store = None
27
+
28
+ # # Store PDF file paths
29
+ # pdf_files = {}
30
+
31
+ # # Define the path for the FAISS index
32
+ # FAISS_INDEX_PATH = "faiss_index"
33
+
34
+ # def process_pdfs(directory: str) -> None:
35
+ # """Process all PDFs in the given directory and add them to the vector store."""
36
+ # global vector_store, pdf_files
37
+ # documents = []
38
+
39
+ # for filename in os.listdir(directory):
40
+ # if filename.endswith(".pdf"):
41
+ # file_path = os.path.join(directory, filename)
42
+ # loader = PyPDFLoader(file_path)
43
+ # documents.extend(loader.load())
44
+ # pdf_files[filename] = file_path
45
+
46
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
47
+ # texts = text_splitter.split_documents(documents)
48
+
49
+ # if os.path.exists(FAISS_INDEX_PATH):
50
+ # vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
51
+ # vector_store.add_documents(texts)
52
+ # else:
53
+ # vector_store = FAISS.from_documents(texts, embeddings)
54
+
55
+ # # Save the updated vector store
56
+ # vector_store.save_local(FAISS_INDEX_PATH)
57
+ # @cl.on_chat_start
58
+ # async def start():
59
+ # """Initialize the chat session."""
60
+ # await cl.Message(content="Welcome! Processing PDFs...").send()
61
+
62
+ # # Process PDFs (replace with your PDF directory)
63
+ # process_pdfs(r"C:\Users\sumes\OneDrive\Documents\pdf_docs")
64
+
65
+ # await cl.Message(content="PDFs processed. You can now ask questions!").send()
66
+
67
+ # @cl.on_message
68
+ # async def main(message: cl.Message):
69
+ # """Handle user messages and generate responses."""
70
+ # if vector_store is None:
71
+ # await cl.Message(content="Error: Vector store not initialized.").send()
72
+ # return
73
+
74
+ # query = message.content
75
+
76
+ # retriever = vector_store.as_retriever(search_kwargs={"k": 1})
77
+
78
+ # llm = OpenAI(openai_api_key=openai.api_key, model="gpt-4o-mini", temperature=0.4)
79
+
80
+ # qa_chain = RetrievalQA.from_chain_type(
81
+ # llm=llm,
82
+ # chain_type="stuff",
83
+ # retriever=retriever,
84
+ # return_source_documents=True
85
+ # )
86
+
87
+ # result = qa_chain(query)
88
+ # answer = result['result']
89
+ # source_docs = result['source_documents']
90
+
91
+ # await cl.Message(content=answer).send()
92
+
93
+ # if source_docs:
94
+ # sources_message = "Sources:\n"
95
+ # for doc in source_docs:
96
+ # file_name = os.path.basename(doc.metadata['source'])
97
+ # if file_name in pdf_files:
98
+ # file_path = pdf_files[file_name]
99
+ # elements = [
100
+ # cl.Text(name=file_name, content=f"Source: {file_name}"),
101
+ # cl.File(name=file_name, path=file_path, display="inline")
102
+ # ]
103
+ # await cl.Message(content=f"Source: {file_name}", elements=elements).send()
104
+ # else:
105
+ # sources_message += f"- {doc.metadata['source']}\n"
106
+
107
+ # if sources_message != "Sources:\n":
108
+ # await cl.Message(content=sources_message).send()
109
+
110
+ # if __name__ == "__main__":
111
+ # cl.run()
112
+
113
+ import os
114
+ from typing import List
115
+ from dotenv import load_dotenv
116
+ import chainlit as cl
117
+ from langchain_community.embeddings import OpenAIEmbeddings
118
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
119
+ from langchain_community.vectorstores import FAISS
120
+ from langchain_community.document_loaders import PyPDFLoader
121
+ from langchain.chains import RetrievalQA
122
+ from langchain_openai import ChatOpenAI
123
+ from langchain_openai import OpenAIEmbeddings
124
+
125
+ # Load environment variables
126
+ load_dotenv()
127
+
128
+ # Initialize OpenAI API key
129
+ openai_api_key = os.getenv("OPENAI_API_KEY")
130
+
131
+ # Initialize embedding model using OpenAI
132
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key,model="text-embedding-3-small")
133
+
134
+ # Initialize vector store
135
+ vector_store = None
136
+
137
+ # Store PDF file paths
138
+ pdf_files = {}
139
+
140
+ # Define the path for the FAISS index
141
+ FAISS_INDEX_PATH = "faiss_index"
142
+ FAISS_INDEX_FILE = os.path.join(FAISS_INDEX_PATH, "index.faiss")
143
+
144
+ def process_pdfs(directory: str) -> None:
145
+ """Process all PDFs in the given directory and add them to the vector store."""
146
+ global vector_store, pdf_files
147
+ documents = []
148
+
149
+ for filename in os.listdir(directory):
150
+ if filename.endswith(".pdf"):
151
+ file_path = os.path.join(directory, filename)
152
+ loader = PyPDFLoader(file_path)
153
+ documents.extend(loader.load())
154
+ pdf_files[filename] = file_path
155
+
156
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
157
+ texts = text_splitter.split_documents(documents)
158
+
159
+ if os.path.exists(FAISS_INDEX_FILE):
160
+ try:
161
+ vector_store = FAISS.load_local(FAISS_INDEX_PATH, embeddings, allow_dangerous_deserialization=True)
162
+ vector_store.add_documents(texts)
163
+ except Exception as e:
164
+ print(f"Error loading FAISS index: {e}")
165
+ vector_store = FAISS.from_documents(texts, embeddings)
166
+ else:
167
+ vector_store = FAISS.from_documents(texts, embeddings)
168
+
169
+ # Save the updated vector store
170
+ if not os.path.exists(FAISS_INDEX_PATH):
171
+ os.makedirs(FAISS_INDEX_PATH)
172
+ vector_store.save_local(FAISS_INDEX_PATH)
173
+
174
+ @cl.on_chat_start
175
+ async def start():
176
+ """Initialize the chat session."""
177
+ await cl.Message(content="Welcome! Processing PDFs...").send()
178
+
179
+ # Process PDFs (replace with your PDF directory)
180
+ process_pdfs(r"C:\Users\sumes\OneDrive\Documents\pdf_docs")
181
+
182
+ await cl.Message(content="PDFs processed. You can now ask questions!").send()
183
+
184
+ # @cl.on_message
185
+ # async def main(message: cl.Message):
186
+ # """Handle user messages and generate responses."""
187
+ # if vector_store is None:
188
+ # await cl.Message(content="Error: Vector store not initialized.").send()
189
+ # return
190
+
191
+ # query = message.content
192
+
193
+ # retriever = vector_store.as_retriever(search_kwargs={"k": 3})
194
+
195
+ # # Initialize the OpenAI language model
196
+ # llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)
197
+
198
+ # qa_chain = RetrievalQA.from_chain_type(
199
+ # llm=llm,
200
+ # chain_type="stuff",
201
+ # retriever=retriever,
202
+ # return_source_documents=True
203
+ # )
204
+
205
+ # result = qa_chain(query)
206
+ # answer = result['result']
207
+ # source_docs = result['source_documents']
208
+
209
+ # await cl.Message(content=answer).send()
210
+
211
+ # if source_docs:
212
+ # sources_message = "Sources:\n"
213
+ # for doc in source_docs:
214
+ # file_name = os.path.basename(doc.metadata['source'])
215
+ # if file_name in pdf_files:
216
+ # file_path = pdf_files[file_name]
217
+ # elements = [
218
+ # cl.Text(name=file_name, content=f"Source: {file_name}"),
219
+ # cl.File(name=file_name, path=file_path, display="inline")
220
+ # ]
221
+ # await cl.Message(content=f"Source: {file_name}", elements=elements).send()
222
+ # else:
223
+ # sources_message += f"- {doc.metadata['source']}\n"
224
+
225
+ # if sources_message != "Sources:\n":
226
+ # await cl.Message(content=sources_message).send()
227
+
228
+
229
+
230
+ @cl.on_message
231
+ async def main(message: cl.Message):
232
+ """Handle user messages and generate responses."""
233
+ if vector_store is None:
234
+ await cl.Message(content="Error: Vector store not initialized.").send()
235
+ return
236
+
237
+ query = message.content
238
+
239
+ retriever = vector_store.as_retriever(search_kwargs={"k": 3})
240
+
241
+ # Initialize the OpenAI language model
242
+ llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4o-mini", temperature=0)
243
+
244
+ qa_chain = RetrievalQA.from_chain_type(
245
+ llm=llm,
246
+ chain_type="stuff",
247
+ retriever=retriever,
248
+ return_source_documents=True
249
+ )
250
+
251
+ result = qa_chain(query)
252
+ answer = result['result']
253
+ source_docs = result['source_documents']
254
+
255
+ await cl.Message(content=answer).send()
256
+
257
+ if source_docs:
258
+ unique_sources = set()
259
+ for doc in source_docs:
260
+ file_name = os.path.basename(doc.metadata['source'])
261
+ if file_name in pdf_files and file_name not in unique_sources:
262
+ unique_sources.add(file_name)
263
+ file_path = pdf_files[file_name]
264
+ elements = [
265
+ cl.Text(name=file_name, content=f"Source: {file_name}"),
266
+ cl.File(name=file_name, path=file_path, display="inline")
267
+ ]
268
+ await cl.Message(content=f"Source: {file_name}", elements=elements).send()
269
+
270
+ other_sources = [doc.metadata['source'] for doc in source_docs if os.path.basename(doc.metadata['source']) not in pdf_files]
271
+ unique_other_sources = set(other_sources)
272
+ if unique_other_sources:
273
+ sources_message = "Other Sources:\n" + "\n".join(f"- {source}" for source in unique_other_sources)
274
+ await cl.Message(content=sources_message).send()
275
+
276
+ if __name__ == "__main__":
277
+ cl.run()