karthikeyan-r commited on
Commit
0b0bcd2
·
0 Parent(s):

Duplicate from karthikeyan-adople/Multi-URL-Doc-Chatbot

Browse files
Files changed (7) hide show
  1. .gitattributes +36 -0
  2. README.md +13 -0
  3. app.py +202 -0
  4. bg.png +3 -0
  5. logo.png +0 -0
  6. requirements.txt +16 -0
  7. style.css +41 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ bg.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Multi URL Doc Chatbot
3
+ emoji: 🏃
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 3.38.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: karthikeyan-adople/Multi-URL-Doc-Chatbot
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import NoneStr
2
+ import os
3
+ from langchain.chains.question_answering import load_qa_chain
4
+ from langchain.document_loaders import UnstructuredFileLoader
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.llms import OpenAI
7
+ from langchain.text_splitter import CharacterTextSplitter
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.vectorstores import Chroma
10
+ from langchain.chains import ConversationalRetrievalChain
11
+ import gradio as gr
12
+ import openai
13
+ from langchain import PromptTemplate, OpenAI, LLMChain
14
+ import validators
15
+ import requests
16
+ import mimetypes
17
+ import tempfile
18
+
19
+ class Chatbot:
20
+ def __init__(self):
21
+ openai.api_key = os.getenv("OPENAI_API_KEY")
22
+ def get_empty_state(self):
23
+
24
+ """ Create empty Knowledge base"""
25
+
26
+ return {"knowledge_base": None}
27
+
28
+ def create_knowledge_base(self,docs):
29
+
30
+ """Create a knowledge base from the given documents.
31
+ Args:
32
+ docs (List[str]): List of documents.
33
+ Returns:
34
+ FAISS: Knowledge base built from the documents.
35
+ """
36
+
37
+ # Initialize a CharacterTextSplitter to split the documents into chunks
38
+ # Each chunk has a maximum length of 500 characters
39
+ # There is no overlap between the chunks
40
+ text_splitter = CharacterTextSplitter(
41
+ separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len
42
+ )
43
+
44
+ # Split the documents into chunks using the text_splitter
45
+ chunks = text_splitter.split_documents(docs)
46
+
47
+ # Initialize an OpenAIEmbeddings model to compute embeddings of the chunks
48
+ embeddings = OpenAIEmbeddings()
49
+
50
+ # Build a knowledge base using Chroma from the chunks and their embeddings
51
+ knowledge_base = Chroma.from_documents(chunks, embeddings)
52
+
53
+ # Return the resulting knowledge base
54
+ return knowledge_base
55
+
56
+
57
+ def upload_file(self,file_paths):
58
+ """Upload a file and create a knowledge base from its contents.
59
+ Args:
60
+ file_paths : The files to uploaded.
61
+ Returns:
62
+ tuple: A tuple containing the file name and the knowledge base.
63
+ """
64
+
65
+ file_paths = [i.name for i in file_paths]
66
+ print(file_paths)
67
+
68
+
69
+ loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
70
+
71
+ # Load the contents of the file using the loader
72
+ docs = []
73
+ for loader in loaders:
74
+ docs.extend(loader.load())
75
+
76
+ # Create a knowledge base from the loaded documents using the create_knowledge_base() method
77
+ knowledge_base = self.create_knowledge_base(docs)
78
+
79
+
80
+ # Return a tuple containing the file name and the knowledge base
81
+ return file_paths, {"knowledge_base": knowledge_base}
82
+
83
+ def add_text(self,history, text):
84
+ history = history + [(text, None)]
85
+ print("History for Add text : ",history)
86
+ return history, gr.update(value="", interactive=False)
87
+
88
+
89
+
90
+ def upload_multiple_urls(self,urls):
91
+ urlss = [url.strip() for url in urls.split(',')]
92
+ all_docs = []
93
+ file_paths = []
94
+ for url in urlss:
95
+ if validators.url(url):
96
+ headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',}
97
+ r = requests.get(url,headers=headers)
98
+ if r.status_code != 200:
99
+ raise ValueError("Check the url of your file; returned status code %s" % r.status_code)
100
+ content_type = r.headers.get("content-type")
101
+ file_extension = mimetypes.guess_extension(content_type)
102
+ temp_file = tempfile.NamedTemporaryFile(suffix=file_extension, delete=False)
103
+ temp_file.write(r.content)
104
+ file_path = temp_file.name
105
+ file_paths.append(file_path)
106
+
107
+ loaders = [UnstructuredFileLoader(file_obj, strategy="fast") for file_obj in file_paths]
108
+
109
+ # Load the contents of the file using the loader
110
+ docs = []
111
+ for loader in loaders:
112
+ docs.extend(loader.load())
113
+
114
+ # Create a knowledge base from the loaded documents using the create_knowledge_base() method
115
+ knowledge_base = self.create_knowledge_base(docs)
116
+
117
+ return file_paths,{"knowledge_base":knowledge_base}
118
+
119
+ def answer_question(self, question,history,state):
120
+ """Answer a question based on the current knowledge base.
121
+ Args:
122
+ state (dict): The current state containing the knowledge base.
123
+ Returns:
124
+ str: The answer to the question.
125
+ """
126
+
127
+ # Retrieve the knowledge base from the state dictionary
128
+ knowledge_base = state["knowledge_base"]
129
+ retriever = knowledge_base.as_retriever()
130
+ qa = ConversationalRetrievalChain.from_llm(
131
+ llm=OpenAI(temperature=0.1),
132
+ retriever=retriever,
133
+ return_source_documents=False)
134
+ # Set the question for which we want to find the answer
135
+ res = []
136
+ question = history[-1][0]
137
+ for human, ai in history[:-1]:
138
+ pair = (human, ai)
139
+ res.append(pair)
140
+
141
+ chat_history = []
142
+
143
+ query = question
144
+ result = qa({"question": query, "chat_history": chat_history})
145
+ # Perform a similarity search on the knowledge base to retrieve relevant documents
146
+ response = result["answer"]
147
+ # Return the response as the answer to the question
148
+ history[-1][1] = response
149
+ print("History for QA : ",history)
150
+ return history
151
+
152
+
153
+ def clear_function(self,state):
154
+ state.clear()
155
+ # state = gr.State(self.get_empty_state())
156
+
157
+ def gradio_interface(self):
158
+
159
+ """Create the Gradio interface for the Chemical Identifier."""
160
+
161
+ with gr.Blocks(css="style.css",theme='karthikeyan-adople/hudsonhayes-gray') as demo:
162
+ gr.HTML("""<center class="darkblue" style='background-color:rgb(0,1,36); text-align:center;padding:25px;'>
163
+ <center>
164
+ <h1 class ="center">
165
+ <img src="file=logo.png" height="110px" width="280px">
166
+ </h1>
167
+ </center>
168
+ <be>
169
+ <h1 style="color:#fff">
170
+ Virtual Assistant Chatbot
171
+ </h1>
172
+ </center>""")
173
+ state = gr.State(self.get_empty_state())
174
+ with gr.Column(elem_id="col-container"):
175
+ with gr.Accordion("Upload Files", open = False):
176
+ with gr.Row(elem_id="row-flex"):
177
+ with gr.Row(elem_id="row-flex"):
178
+ with gr.Column(scale=1,):
179
+ file_url = gr.Textbox(label='file url :',show_label=True, placeholder="")
180
+ with gr.Row(elem_id="row-flex"):
181
+ with gr.Column(scale=1):
182
+ file_output = gr.File()
183
+ with gr.Column(scale=1):
184
+ upload_button = gr.UploadButton("Browse File", file_types=[".txt", ".pdf", ".doc", ".docx"],file_count = "multiple")
185
+ with gr.Row():
186
+ chatbot = gr.Chatbot([], elem_id="chatbot")
187
+ with gr.Row():
188
+ txt = gr.Textbox(label = "Question",show_label=True,placeholder="Enter text and press Enter")
189
+ with gr.Row():
190
+ clear_btn = gr.Button(value="Clear")
191
+
192
+ txt_msg = txt.submit(self.add_text, [chatbot, txt], [chatbot, txt], queue=False).then(self.answer_question, [txt, chatbot, state], chatbot)
193
+ txt_msg.then(lambda: gr.update(interactive=True), None, [txt], queue=False)
194
+ file_url.submit(self.upload_multiple_urls, file_url, [file_output, state])
195
+ clear_btn.click(self.clear_function,[state],[])
196
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
197
+ upload_button.upload(self.upload_file, upload_button, [file_output,state])
198
+ demo.queue().launch(debug=True)
199
+
200
+ if __name__=="__main__":
201
+ chatbot = Chatbot()
202
+ chatbot.gradio_interface()
bg.png ADDED

Git LFS Details

  • SHA256: 4297a3e1f891519bb5a8439ce713dcb2aeae63c290b4665b32f23729206ba123
  • Pointer size: 132 Bytes
  • Size of remote file: 1.15 MB
logo.png ADDED
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ tiktoken
3
+ chromadb
4
+ langchain
5
+ gradio
6
+ pypdf
7
+ requests
8
+ unstructured
9
+ validators
10
+ pytesseract
11
+ pdf2image
12
+ tabulate
13
+ nltk
14
+ python-dotenv
15
+ faiss-cpu
16
+ requests
style.css ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #col-container {
2
+ max-width: 1000px;
3
+ margin-left: auto;
4
+ margin-right: auto;
5
+ }
6
+ .heightfit{
7
+ height:120px;
8
+ }
9
+ gradio-app{
10
+ background:url("file=bg.png") !important;
11
+ }
12
+
13
+ #row-flex {
14
+ display: flex;
15
+ align-items: center;
16
+ justify-content: center;
17
+ }
18
+ .leftimage .rightimage{
19
+ float:left;
20
+ }
21
+ .leftimage{
22
+ padding-top:27px;
23
+ margin-left:210px;
24
+ }
25
+ .rightimage{
26
+ margin-right:210px;
27
+ margin-top:15px;
28
+ }
29
+ a,
30
+ a:hover,
31
+ a:visited {
32
+ text-decoration-line: underline;
33
+ font-weight: 600;
34
+ color: #1f2937 !important;
35
+ }
36
+
37
+ .dark a,
38
+ .dark a:hover,
39
+ .dark a:visited {
40
+ color: #f3f4f6 !important;
41
+ }