KarthickAdopleAI commited on
Commit
5d899f8
·
verified ·
1 Parent(s): 43f8021

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +323 -0
app.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.text_splitter import CharacterTextSplitter
2
+ from langchain.vectorstores import FAISS
3
+ from langchain.chat_models import ChatOpenAI
4
+ from langchain_openai import AzureChatOpenAI,AzureOpenAIEmbeddings
5
+ from langchain.memory import ConversationBufferMemory
6
+ from langchain.chains import ConversationChain
7
+ from langchain.chains import ConversationalRetrievalChain
8
+ from langchain.document_loaders import UnstructuredFileLoader
9
+ from typing import List, Dict, Tuple
10
+ import gradio as gr
11
+ import validators
12
+ import requests
13
+ import mimetypes
14
+ import tempfile
15
+ import os
16
+ from langchain.chains.question_answering import load_qa_chain
17
+ from langchain.llms import OpenAI
18
+ from langchain.prompts import PromptTemplate
19
+ from langchain.prompts.prompt import PromptTemplate
20
+ import pandas as pd
21
+ from langchain_experimental.agents.agent_toolkits import create_csv_agent
22
+ from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
23
+ from langchain.agents import ZeroShotAgent, Tool, AgentExecutor
24
+ from langchain.agents.agent_types import AgentType
25
+ # from langchain.agents import create_csv_agent
26
+ from langchain import OpenAI, LLMChain
27
+ from openai import AzureOpenAI
28
+
29
+
30
+ class ChatDocumentQA:
31
+ def __init__(self) -> None:
32
+ pass
33
+
34
+ def _get_empty_state(self) -> Dict[str, None]:
35
+ """Create an empty knowledge base."""
36
+ return {"knowledge_base": None}
37
+
38
+ def _extract_text_from_pdfs(self, file_paths: List[str]) -> List[str]:
39
+ """Extract text content from PDF files.
40
+
41
+ Args:
42
+ file_paths (List[str]): List of file paths.
43
+
44
+ Returns:
45
+ List[str]: Extracted text from the PDFs.
46
+ """
47
+ docs = []
48
+ loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
49
+ for loader in loaders:
50
+ docs.extend(loader.load())
51
+ return docs
52
+
53
+ def _get_content_from_url(self, urls: str) -> List[str]:
54
+ """Fetch content from given URLs.
55
+
56
+ Args:
57
+ urls (str): Comma-separated URLs.
58
+
59
+ Returns:
60
+ List[str]: List of text content fetched from the URLs.
61
+ """
62
+ file_paths = []
63
+ for url in urls.split(','):
64
+ if validators.url(url):
65
+ headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',}
66
+ r = requests.get(url, headers=headers)
67
+ if r.status_code != 200:
68
+ raise ValueError("Check the url of your file; returned status code %s" % r.status_code)
69
+ content_type = r.headers.get("content-type")
70
+ file_extension = mimetypes.guess_extension(content_type)
71
+ temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
72
+ temp_file.write(r.content)
73
+ file_paths.append(temp_file.name)
74
+
75
+ print("File_Paths:",file_paths)
76
+ docs = self._extract_text_from_pdfs(file_paths)
77
+ return docs
78
+
79
+ def _split_text_into_chunks(self, text: str) -> List[str]:
80
+ """Split text into smaller chunks.
81
+
82
+ Args:
83
+ text (str): Input text to be split.
84
+
85
+ Returns:
86
+ List[str]: List of smaller text chunks.
87
+ """
88
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=6000, chunk_overlap=0, length_function=len)
89
+
90
+ chunks = text_splitter.split_documents(text)
91
+
92
+ return chunks
93
+
94
+ def _create_vector_store_from_text_chunks(self, text_chunks: List[str]) -> FAISS:
95
+ """Create a vector store from text chunks.
96
+
97
+ Args:
98
+ text_chunks (List[str]): List of text chunks.
99
+
100
+ Returns:
101
+ FAISS: Vector store created from the text chunks.
102
+ """
103
+ embeddings = AzureOpenAIEmbeddings(
104
+ azure_deployment="text-embedding-3-large",
105
+ )
106
+
107
+ return FAISS.from_documents(documents=text_chunks, embedding=embeddings)
108
+
109
+
110
+ def _create_conversation_chain(self,vectorstore):
111
+
112
+ _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
113
+
114
+ Chat History: {chat_history}
115
+ Follow Up Input: {question}
116
+ Standalone question:"""
117
+ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
118
+
119
+ memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
120
+
121
+ # llm = ChatOpenAI(temperature=0)
122
+ llm=AzureChatOpenAI(azure_deployment = "GPT-4o")
123
+
124
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(),
125
+ condense_question_prompt=CONDENSE_QUESTION_PROMPT,
126
+ memory=memory)
127
+
128
+ def _get_documents_knowledge_base(self, file_paths: List[str]) -> Tuple[str, Dict[str, FAISS]]:
129
+ """Build knowledge base from uploaded files.
130
+
131
+ Args:
132
+ file_paths (List[str]): List of file paths.
133
+
134
+ Returns:
135
+ Tuple[str, Dict]: Tuple containing a status message and the knowledge base.
136
+ """
137
+ file_path = file_paths[0].name
138
+ file_extension = os.path.splitext(file_path)[1]
139
+
140
+ if file_extension == '.csv':
141
+ # agent = self.create_agent(file_path)
142
+ # tools = self.get_agent_tools(agent)
143
+ # memory,tools,prompt = self.create_memory_for_csv_qa(tools)
144
+ # agent_chain = self.create_agent_chain_for_csv_qa(memory,tools,prompt)
145
+ agent_chain = create_csv_agent(
146
+ AzureChatOpenAI(azure_deployment = "GPT-4o"),
147
+ file_path,
148
+ verbose=True,
149
+ agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
150
+ )
151
+ return "file uploaded", {"knowledge_base": agent_chain}
152
+
153
+ else:
154
+ pdf_docs = [file_path.name for file_path in file_paths]
155
+ raw_text = self._extract_text_from_pdfs(pdf_docs)
156
+ text_chunks = self._split_text_into_chunks(raw_text)
157
+ vectorstore = self._create_vector_store_from_text_chunks(text_chunks)
158
+ return "file uploaded", {"knowledge_base": vectorstore}
159
+
160
+
161
+ def _get_urls_knowledge_base(self, urls: str) -> Tuple[str, Dict[str, FAISS]]:
162
+ """Build knowledge base from URLs.
163
+
164
+ Args:
165
+ urls (str): Comma-separated URLs.
166
+
167
+ Returns:
168
+ Tuple[str, Dict]: Tuple containing a status message and the knowledge base.
169
+ """
170
+ webpage_text = self._get_content_from_url(urls)
171
+ text_chunks = self._split_text_into_chunks(webpage_text)
172
+ vectorstore = self._create_vector_store_from_text_chunks(text_chunks)
173
+ return "file uploaded", {"knowledge_base": vectorstore}
174
+
175
+ #************************
176
+ # csv qa
177
+ #************************
178
+ def create_agent(self,file_path):
179
+ agent_chain = create_csv_agent(
180
+ AzureChatOpenAI(azure_deployment = "GPT-4o"),
181
+ file_path,
182
+ verbose=True,
183
+ agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
184
+ )
185
+ return agent_chain
186
+ def get_agent_tools(self,agent):
187
+ # search = agent
188
+ tools = [
189
+ Tool(
190
+ name="dataframe qa",
191
+ func=agent.run,
192
+ description="useful for when you need to answer questions about table data and dataframe data",
193
+ )
194
+ ]
195
+ return tools
196
+
197
+ def create_memory_for_csv_qa(self,tools):
198
+ prefix = """Have a conversation with a human, answering the following questions about table data and dataframe data as best you can. You have access to the following tools:"""
199
+ suffix = """Begin!"
200
+
201
+ {chat_history}
202
+ Question: {input}
203
+ {agent_scratchpad}"""
204
+
205
+ prompt = ZeroShotAgent.create_prompt(
206
+ tools,
207
+ prefix=prefix,
208
+ suffix=suffix,
209
+ input_variables=["input", "chat_history", "agent_scratchpad"],
210
+ )
211
+ memory = ConversationBufferMemory(memory_key="chat_history",return_messages=True)
212
+
213
+ return memory,tools,prompt
214
+
215
+ def create_agent_chain_for_csv_qa(self,memory,tools,prompt):
216
+
217
+ llm_chain = LLMChain(llm=AzureChatOpenAI(azure_deployment = "GPT-4o"), prompt=prompt)
218
+ agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
219
+ agent_chain = AgentExecutor.from_agent_and_tools(
220
+ agent=agent, tools=tools, verbose=True, memory=memory
221
+ )
222
+
223
+ return agent_chain
224
+
225
+ def _get_response(self, message: str, chat_history: List[Tuple[str, str]], state: Dict[str, FAISS],file_paths) -> Tuple[str, List[Tuple[str, str]]]:
226
+ """Get a response from the chatbot.
227
+
228
+ Args:
229
+ message (str): User's message/question.
230
+ chat_history (List[Tuple[str, str]]): List of chat history as tuples of (user_message, bot_response).
231
+ state (dict): State containing the knowledge base.
232
+
233
+ Returns:
234
+ Tuple[str, List[Tuple[str, str]]]: Tuple containing a status message and updated chat history.
235
+ """
236
+ try:
237
+ if file_paths:
238
+ file_path = file_paths[0].name
239
+ file_extension = os.path.splitext(file_path)[1]
240
+
241
+ if file_extension == '.csv':
242
+ agent_chain = state["knowledge_base"]
243
+ response = agent_chain.run(input = message)
244
+ chat_history.append((message, response))
245
+ return "", chat_history
246
+
247
+ else:
248
+ vectorstore = state["knowledge_base"]
249
+ chat = self._create_conversation_chain(vectorstore)
250
+ response = chat({"question": message,"chat_history": chat_history})
251
+ chat_history.append((message, response["answer"]))
252
+ return "", chat_history
253
+ else:
254
+ vectorstore = state["knowledge_base"]
255
+ chat = self._create_conversation_chain(vectorstore)
256
+ response = chat({"question": message,"chat_history": chat_history})
257
+ chat_history.append((message, response["answer"]))
258
+ return "", chat_history
259
+ except:
260
+ chat_history.append((message, "Please Upload Document or URL"))
261
+ return "", chat_history
262
+
263
+ def gradio_interface(self) -> None:
264
+ """Create a Gradio interface for the chatbot."""
265
+ with gr.Blocks(css="#textbox_id textarea {color: white}",theme='SherlockRamos/Feliz') as demo:
266
+ gr.HTML("""
267
+ <style>
268
+ .footer {
269
+ display: none !important;
270
+ }
271
+ footer {
272
+ display: none !important;
273
+ }
274
+ #foot {
275
+ display: none !important;
276
+ }
277
+ .svelte-1fzp3xt {
278
+ display: none !important;
279
+ }
280
+ #root > div > div > div {
281
+ padding-bottom: 0 !important;
282
+ }
283
+ .custom-footer {
284
+ text-align: center;
285
+ padding: 10px;
286
+ font-size: 14px;
287
+ color: #333;
288
+ }
289
+ </style>
290
+ """)
291
+ gr.HTML("""<div><img src="https://www.broadridge.com/_assets/images/logos/br-pimary-blue-logo.svg" alt="Broadridge" style="float:left;width:200px;height:50px;"><h1 style="color:#000;margin-left:5in;padding-top:10px">Virtual Assistant Chatbot</h1></div>""")
292
+
293
+ state = gr.State(self._get_empty_state())
294
+ chatbot = gr.Chatbot()
295
+
296
+ with gr.Row():
297
+ with gr.Column(scale=0.85):
298
+ msg = gr.Textbox(label="Question", elem_id="textbox_id")
299
+ with gr.Column(scale=0.15):
300
+ file_output = gr.Textbox(label="File Status")
301
+ with gr.Row():
302
+ with gr.Column(scale=0.85):
303
+ clear = gr.ClearButton([msg, chatbot])
304
+ with gr.Column(scale=0.15):
305
+ upload_button = gr.UploadButton(
306
+ "Browse File",
307
+ file_types=[".txt", ".pdf", ".doc", ".docx", ".csv"],
308
+ file_count="multiple", variant="primary"
309
+ )
310
+ with gr.Row():
311
+ with gr.Column(scale=1):
312
+ input_url = gr.Textbox(label="urls", elem_id="textbox_id")
313
+
314
+ input_url.submit(self._get_urls_knowledge_base, input_url, [file_output, state])
315
+ upload_button.upload(self._get_documents_knowledge_base, upload_button, [file_output, state])
316
+ msg.submit(self._get_response, [msg, chatbot, state,upload_button], [msg, chatbot])
317
+
318
+ demo.launch(debug=True)
319
+
320
+
321
+ if __name__ == "__main__":
322
+ chatdocumentqa = ChatDocumentQA()
323
+ chatdocumentqa.gradio_interface()