Upload app.py
Browse files
app.py
CHANGED
@@ -13,30 +13,71 @@ from langchain_core.output_parsers import StrOutputParser
|
|
13 |
from langchain_core.runnables import RunnablePassthrough
|
14 |
|
15 |
|
16 |
-
print(f"Pyton version {sys.version}.")
|
17 |
-
|
18 |
# Initialize the FAISS vector store
|
19 |
vector_store = None
|
20 |
|
21 |
# Sample PDF file
|
22 |
-
sample_filenames = ["
|
23 |
-
|
24 |
-
|
25 |
|
26 |
desc = """
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
It works by first retrieving relevant documents from an external knowledge source (like PDF files) and then using a LLM model to produce responses based on both the input query and the retrieved content.
|
31 |
This method enhances factual correctness and allows the model to access up-to-date or domain-specific information without retraining.
|
|
|
|
|
|
|
32 |
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
"""
|
39 |
|
|
|
40 |
sample_button = "Load User Guide and Installation Guide documents"
|
41 |
|
42 |
|
@@ -63,33 +104,15 @@ Question: {question}
|
|
63 |
Answer:
|
64 |
"""
|
65 |
|
66 |
-
# Function to handle PDF upload and indexing
|
67 |
-
def index_pdf(pdf):
|
68 |
-
global vector_store
|
69 |
-
|
70 |
-
# Load the PDF
|
71 |
-
loader = PyPDFLoader(pdf.name)
|
72 |
-
documents = loader.load()
|
73 |
-
|
74 |
-
# Split the documents into chunks
|
75 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
76 |
-
texts = text_splitter.split_documents(documents)
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
# Store the embeddings in the vector store
|
82 |
-
vector_store = FAISS.from_documents(texts, embeddings)
|
83 |
-
|
84 |
-
return "PDF(s) indexed successfully!"
|
85 |
-
|
86 |
-
def load_sample_pdf():
|
87 |
global vector_store
|
88 |
documents = []
|
89 |
|
90 |
# Load the PDFs
|
91 |
-
for file in
|
92 |
-
loader = PyPDFLoader(file)
|
93 |
documents.extend(loader.load())
|
94 |
# print(f"{file} is processed!")
|
95 |
|
@@ -98,7 +121,9 @@ def load_sample_pdf():
|
|
98 |
texts = text_splitter.split_documents(documents)
|
99 |
|
100 |
# Embed the chunks
|
101 |
-
|
|
|
|
|
102 |
|
103 |
# Store the embeddings in the vector store
|
104 |
vector_store = FAISS.from_documents(texts, embeddings)
|
@@ -112,7 +137,7 @@ def format_docs(docs):
|
|
112 |
def generate_response(query, history, model, temperature, max_tokens, top_p, seed):
|
113 |
|
114 |
if vector_store is None:
|
115 |
-
return "Please upload and index a PDF at the Indexing tab."
|
116 |
|
117 |
if seed == 0:
|
118 |
seed = random.randint(1, 100000)
|
@@ -135,46 +160,147 @@ def generate_response(query, history, model, temperature, max_tokens, top_p, see
|
|
135 |
|
136 |
return response, relevant_info
|
137 |
|
138 |
-
additional_inputs = [
|
139 |
-
gr.Dropdown(choices=["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it"], value="gemma2-9b-it", label="Model"),
|
140 |
-
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
|
141 |
-
gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
|
142 |
-
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
|
143 |
-
gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
|
144 |
-
]
|
145 |
|
146 |
|
147 |
-
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
with gr.Tab("Indexing"):
|
150 |
-
gr.
|
151 |
-
|
152 |
-
|
153 |
-
# index_button = gr.Button("Index PDF")
|
154 |
-
# load_sample = gr.Button("Alternatively, Load and Index [Attention Is All You Need.pdf] as a Sample")
|
155 |
-
load_sample = gr.Button(sample_button)
|
156 |
-
index_output = gr.Textbox(label="Indexing Status")
|
157 |
-
# index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
|
158 |
-
load_sample.click(load_sample_pdf, inputs=None, outputs=index_output)
|
159 |
-
|
160 |
-
with gr.Tab("Chatbot"):
|
161 |
with gr.Row():
|
162 |
with gr.Column():
|
163 |
-
gr.
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
examples=examples_questions,
|
167 |
-
|
|
|
|
|
168 |
cache_examples=False,
|
169 |
)
|
170 |
-
# with gr.Column():
|
171 |
-
# retrieve_button = gr.Button("Retrieve Relevant Info")
|
172 |
-
# relevant_info = gr.Textbox(
|
173 |
-
# label="Retrieved Information",
|
174 |
-
# interactive=False,
|
175 |
-
# lines=20,
|
176 |
-
# )
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
|
179 |
-
#
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
from langchain_core.runnables import RunnablePassthrough
|
14 |
|
15 |
|
|
|
|
|
16 |
# Initialize the FAISS vector store
|
17 |
vector_store = None
|
18 |
|
19 |
# Sample PDF file
|
20 |
+
sample_filenames = ["User Guide.pdf",
|
21 |
+
"Installation.pdf",
|
22 |
+
]
|
23 |
|
24 |
desc = """
|
25 |
+
<h2 style="text-align: center; color: #333;">This is a Demo of RAG (Retrieval-Augmented Generation)</h2>
|
26 |
+
<p style="text-align: left; color: #555;">
|
27 |
+
<b>RAG</b> is an approach that combines retrieval-based and generative LLM models to improve the accuracy and relevance of generated text.
|
28 |
It works by first retrieving relevant documents from an external knowledge source (like PDF files) and then using a LLM model to produce responses based on both the input query and the retrieved content.
|
29 |
This method enhances factual correctness and allows the model to access up-to-date or domain-specific information without retraining.
|
30 |
+
</p>
|
31 |
+
<hr/>
|
32 |
+
"""
|
33 |
|
34 |
+
desc_pdf_upload = """
|
35 |
+
<p style="text-align: left; color: #555;">
|
36 |
+
Choose the PDF files and click <b>Load and Index Documents</b> button below to upload and index the files. It could take some time depends on the size of files.
|
37 |
+
Once you see the message <i>"PDF(s) indexed successfully!"</i> in the below <b>Indexing Status</b>, go to the <b>Chatbot</b> tab to ask any relevant questios.
|
38 |
+
</p>
|
39 |
+
"""
|
40 |
|
41 |
+
desc_sample = """
|
42 |
+
<p style="text-align: left; color: #555;">
|
43 |
+
Alternatively, click the button below to load a <b>User Guide</b> and an <b>Installation</b> for a smoke alarm device into the vector database. It could take a couple of minutes to process.
|
44 |
+
Once you see the message <i>"PDF(s) indexed successfully!"</i> in the below <b>Indexing Status</b>, go to the <b>Chatbot</b> tab to ask any relevant questions about the device.
|
45 |
+
</p>
|
46 |
+
"""
|
47 |
|
48 |
+
gui_css="""
|
49 |
+
.gradio-container {
|
50 |
+
font-family: 'Inter', sans-serif;
|
51 |
+
border-radius: 12px;
|
52 |
+
overflow: hidden;
|
53 |
+
}
|
54 |
+
.panel {
|
55 |
+
border-radius: 8px;
|
56 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
|
57 |
+
}
|
58 |
+
.gr-button {
|
59 |
+
border-radius: 8px;
|
60 |
+
padding: 10px 20px;
|
61 |
+
font-weight: bold;
|
62 |
+
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
|
63 |
+
transition: all 0.2s ease-in-out;
|
64 |
+
}
|
65 |
+
.gr-button:hover {
|
66 |
+
transform: translateY(-2px);
|
67 |
+
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
|
68 |
+
}
|
69 |
+
.gr-textbox textarea {
|
70 |
+
border-radius: 8px;
|
71 |
+
}
|
72 |
+
.gr-slider {
|
73 |
+
padding: 10px 0;
|
74 |
+
}
|
75 |
+
.gr-tabitem {
|
76 |
+
padding: 20px;
|
77 |
+
}
|
78 |
"""
|
79 |
|
80 |
+
|
81 |
sample_button = "Load User Guide and Installation Guide documents"
|
82 |
|
83 |
|
|
|
104 |
Answer:
|
105 |
"""
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
+
# Function to handle PDF upload and indexing
|
109 |
+
def load_pdf(files):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
global vector_store
|
111 |
documents = []
|
112 |
|
113 |
# Load the PDFs
|
114 |
+
for file in files:
|
115 |
+
loader = PyPDFLoader(file.name)
|
116 |
documents.extend(loader.load())
|
117 |
# print(f"{file} is processed!")
|
118 |
|
|
|
121 |
texts = text_splitter.split_documents(documents)
|
122 |
|
123 |
# Embed the chunks
|
124 |
+
# embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
125 |
+
embedding_model_name = "bert-base-uncased"
|
126 |
+
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, encode_kwargs={"normalize_embeddings": True})
|
127 |
|
128 |
# Store the embeddings in the vector store
|
129 |
vector_store = FAISS.from_documents(texts, embeddings)
|
|
|
137 |
def generate_response(query, history, model, temperature, max_tokens, top_p, seed):
|
138 |
|
139 |
if vector_store is None:
|
140 |
+
return "Please upload and index a PDF at the Indexing tab.", ""
|
141 |
|
142 |
if seed == 0:
|
143 |
seed = random.randint(1, 100000)
|
|
|
160 |
|
161 |
return response, relevant_info
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
|
165 |
+
template = """
|
166 |
+
You are a helpful AI assistant. Use the following context to answer the question.
|
167 |
+
If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
168 |
+
|
169 |
+
{context}
|
170 |
+
|
171 |
+
Question: {question}
|
172 |
+
"""
|
173 |
+
|
174 |
+
|
175 |
+
|
176 |
+
# --- Gradio Interface using gr.Blocks() ---
|
177 |
+
with gr.Blocks(theme=gr.themes.Soft(), css=gui_css) as demo:
|
178 |
with gr.Tab("Indexing"):
|
179 |
+
with gr.Row():
|
180 |
+
gr.Markdown(desc)
|
181 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
with gr.Row():
|
183 |
with gr.Column():
|
184 |
+
gr.Markdown(desc_pdf_upload)
|
185 |
+
pdf_files = gr.File(label="Upload PDF Document", file_types=[".pdf"], interactive=True, file_count="multiple")
|
186 |
+
load_button = gr.Button("Load and Index Documents", variant="secondary")
|
187 |
+
|
188 |
+
with gr.Column():
|
189 |
+
gr.Markdown(desc_sample)
|
190 |
+
sample_files = gr.File(
|
191 |
+
label="Sample PDF Files",
|
192 |
+
file_count="multiple",
|
193 |
+
file_types=[".pdf"],
|
194 |
+
value=sample_filenames,
|
195 |
+
visible=True,
|
196 |
+
interactive=False
|
197 |
+
)
|
198 |
+
sample_button = gr.Button(sample_button)
|
199 |
+
|
200 |
+
with gr.Row():
|
201 |
+
index_output = gr.Textbox(label="Indexing Status")
|
202 |
+
sample_button.click(load_pdf, inputs=sample_files, outputs=index_output)
|
203 |
+
load_button.click(load_pdf, inputs=pdf_files, outputs=index_output)
|
204 |
+
|
205 |
+
with gr.Tab("Chatbot"):
|
206 |
+
with gr.Row():
|
207 |
+
with gr.Column(scale=2):
|
208 |
+
# Chatbot component
|
209 |
+
chatbot = gr.Chatbot(
|
210 |
+
show_label=False,
|
211 |
+
show_share_button=False,
|
212 |
+
show_copy_button=True,
|
213 |
+
layout="panel",
|
214 |
+
height=500, # Set a fixed height for the chatbot
|
215 |
+
avatar_images=(
|
216 |
+
"https://placehold.co/60x60/FFD700/000000?text=U", # User avatar
|
217 |
+
"https://placehold.co/60x60/6366F1/FFFFFF?text=AI" # Bot avatar
|
218 |
+
)
|
219 |
+
)
|
220 |
+
|
221 |
+
# Message input textbox
|
222 |
+
msg = gr.Textbox(
|
223 |
+
label="Your Message",
|
224 |
+
placeholder="Type your message here...",
|
225 |
+
show_copy_button=True,
|
226 |
+
container=False # Prevent it from being wrapped in a default container
|
227 |
+
)
|
228 |
+
|
229 |
+
with gr.Row():
|
230 |
+
submit_btn = gr.Button("Send", variant="primary")
|
231 |
+
clear_btn = gr.ClearButton() # Will be configured below
|
232 |
+
|
233 |
+
gr.Examples(
|
234 |
examples=examples_questions,
|
235 |
+
inputs=[msg],
|
236 |
+
outputs=[msg], # Update the message input with the example
|
237 |
+
label="Quick Examples",
|
238 |
cache_examples=False,
|
239 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
|
241 |
+
with gr.Column(scale=1):
|
242 |
+
gr.Markdown("### LLM Settings")
|
243 |
+
model_name = gr.Dropdown(label="Model Name",
|
244 |
+
choices=[
|
245 |
+
"llama-3.3-70b-versatile",
|
246 |
+
"llama-3.1-8b-instant",
|
247 |
+
"llama3-70b-8192",
|
248 |
+
"llama3-8b-8192",
|
249 |
+
"mixtral-8x7b-32768",
|
250 |
+
"gemma2-9b-it"
|
251 |
+
],
|
252 |
+
value="llama-3.3-70b-versatile",
|
253 |
+
interactive=True
|
254 |
+
)
|
255 |
+
|
256 |
+
|
257 |
+
temperature_slider = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.01, label="Temperature", interactive=True)
|
258 |
+
max_tokens_slider = gr.Slider(minimum=10, maximum=2000, value=500, step=10, label="Max Tokens", interactive=True)
|
259 |
+
top_p_slider = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="Top P", interactive=True)
|
260 |
+
seed_number = gr.Number(minimum=0, maximum=100000, value=0, step=1, label="Seed", precision=0, interactive=True)
|
261 |
+
|
262 |
+
gr.Markdown("### Retrieved Information")
|
263 |
+
# Textbox for relevant_info
|
264 |
+
relevant_info_textbox = gr.Textbox(
|
265 |
+
label="Retrieved Information",
|
266 |
+
interactive=False, # Not editable by the user
|
267 |
+
lines=20,
|
268 |
+
show_copy_button=True,
|
269 |
+
autoscroll=True,
|
270 |
+
container=True # Ensure it has a container for styling
|
271 |
+
)
|
272 |
+
|
273 |
+
# --- Event Handling ---
|
274 |
+
# This function acts as a wrapper to process inputs and distribute outputs
|
275 |
+
def process_chat_and_info(message, chat_history, model, temp, max_tok, top_p_val, seed_val):
|
276 |
+
# Call your generate_response function which returns two values
|
277 |
+
bot_message, retrieved_info = generate_response(
|
278 |
+
message, chat_history, model, temp, max_tok, top_p_val, seed_val
|
279 |
+
)
|
280 |
+
|
281 |
+
# Update the chat history for the chatbot component
|
282 |
+
chat_history.append((message, bot_message))
|
283 |
|
284 |
+
# Return values in the order of the outputs list
|
285 |
+
return chat_history, retrieved_info, "" # Clear the message input after sending
|
286 |
+
|
287 |
+
# Bind the `process_chat_and_info` function to the submit event of the message textbox
|
288 |
+
msg.submit(
|
289 |
+
fn=process_chat_and_info,
|
290 |
+
inputs=[msg, chatbot, model_name, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
|
291 |
+
outputs=[chatbot, relevant_info_textbox, msg], # Order matters here: chatbot, relevant_info, then msg
|
292 |
+
queue=False # Set to True if you expect heavy load
|
293 |
+
)
|
294 |
+
|
295 |
+
# Bind the `process_chat_and_info` function to the click event of the send button
|
296 |
+
submit_btn.click(
|
297 |
+
fn=process_chat_and_info,
|
298 |
+
inputs=[msg, chatbot, model_name, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
|
299 |
+
outputs=[chatbot, relevant_info_textbox, msg], # Order matters here
|
300 |
+
queue=False # Set to True if you expect heavy load
|
301 |
+
)
|
302 |
+
# Configure the clear button to clear both the chatbot and the relevant_info_textbox
|
303 |
+
clear_btn.add([msg, chatbot, relevant_info_textbox])
|
304 |
+
|
305 |
+
|
306 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|