jchen8000 commited on
Commit
eed60c0
·
verified ·
1 Parent(s): 25b992d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +194 -68
app.py CHANGED
@@ -13,30 +13,71 @@ from langchain_core.output_parsers import StrOutputParser
13
  from langchain_core.runnables import RunnablePassthrough
14
 
15
 
16
- print(f"Pyton version {sys.version}.")
17
-
18
  # Initialize the FAISS vector store
19
  vector_store = None
20
 
21
  # Sample PDF file
22
- sample_filenames = ["Installation.pdf",
23
- "User Guide.pdf",
24
- ]
25
 
26
  desc = """
27
- ### This is a Demo of Retrieval-Augmented Generation (RAG)
28
-
29
- **RAG** is an approach that combines retrieval-based and generative LLM models to improve the accuracy and relevance of generated text. 
30
  It works by first retrieving relevant documents from an external knowledge source (like PDF files) and then using a LLM model to produce responses based on both the input query and the retrieved content. 
31
  This method enhances factual correctness and allows the model to access up-to-date or domain-specific information without retraining.
 
 
 
32
 
33
- Click the button below to load a **User Guide** and an **Installation Guide** for a smoke alarm device into the vector database. It could take a couple of minutes to process.
34
- Once you see the message *"PDF(s) indexed successfully!"*, go to the **Chatbot** tab to ask any relevant questions about the device.
 
 
 
 
35
 
36
- You can change the LLM models in the **Additional Inputs** at the bottom of the **Chatbot** tab, in case of certain model is out of date. You can also adjust the LLM parameters there.
 
 
 
 
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  """
39
 
 
40
  sample_button = "Load User Guide and Installation Guide documents"
41
 
42
 
@@ -63,33 +104,15 @@ Question: {question}
63
  Answer:
64
  """
65
 
66
- # Function to handle PDF upload and indexing
67
- def index_pdf(pdf):
68
- global vector_store
69
-
70
- # Load the PDF
71
- loader = PyPDFLoader(pdf.name)
72
- documents = loader.load()
73
-
74
- # Split the documents into chunks
75
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
76
- texts = text_splitter.split_documents(documents)
77
 
78
- # Embed the chunks
79
- embeddings = HuggingFaceEmbeddings(model_name="bert-base-uncased", encode_kwargs={"normalize_embeddings": True})
80
-
81
- # Store the embeddings in the vector store
82
- vector_store = FAISS.from_documents(texts, embeddings)
83
-
84
- return "PDF(s) indexed successfully!"
85
-
86
- def load_sample_pdf():
87
  global vector_store
88
  documents = []
89
 
90
  # Load the PDFs
91
- for file in sample_filenames:
92
- loader = PyPDFLoader(file)
93
  documents.extend(loader.load())
94
  # print(f"{file} is processed!")
95
 
@@ -98,7 +121,9 @@ def load_sample_pdf():
98
  texts = text_splitter.split_documents(documents)
99
 
100
  # Embed the chunks
101
- embeddings = HuggingFaceEmbeddings(model_name="bert-base-uncased", encode_kwargs={"normalize_embeddings": True})
 
 
102
 
103
  # Store the embeddings in the vector store
104
  vector_store = FAISS.from_documents(texts, embeddings)
@@ -112,7 +137,7 @@ def format_docs(docs):
112
  def generate_response(query, history, model, temperature, max_tokens, top_p, seed):
113
 
114
  if vector_store is None:
115
- return "Please upload and index a PDF at the Indexing tab."
116
 
117
  if seed == 0:
118
  seed = random.randint(1, 100000)
@@ -135,46 +160,147 @@ def generate_response(query, history, model, temperature, max_tokens, top_p, see
135
 
136
  return response, relevant_info
137
 
138
- additional_inputs = [
139
- gr.Dropdown(choices=["llama-3.3-70b-versatile", "llama-3.1-8b-instant", "llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma2-9b-it"], value="gemma2-9b-it", label="Model"),
140
- gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Temperature", info="Controls diversity of the generated text. Lower is more deterministic, higher is more creative."),
141
- gr.Slider(minimum=1, maximum=8000, step=1, value=8000, label="Max Tokens", info="The maximum number of tokens that the model can process in a single response.<br>Maximums: 8k for gemma 7b it, gemma2 9b it, llama 7b & 70b, 32k for mixtral 8x7b, 132k for llama 3.1."),
142
- gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.5, label="Top P", info="A method of text generation where a model will only consider the most probable next tokens that make up the probability p."),
143
- gr.Number(precision=0, value=0, label="Seed", info="A starting point to initiate generation, use 0 for random")
144
- ]
145
 
146
 
147
- # Create the Gradio interface
148
- with gr.Blocks(theme=gr.themes.Default()) as demo:
 
 
 
 
 
 
 
 
 
 
 
149
  with gr.Tab("Indexing"):
150
- gr.Markdown(desc)
151
- # pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
152
- # pdf_input = gr.Textbox(label="PDF File")
153
- # index_button = gr.Button("Index PDF")
154
- # load_sample = gr.Button("Alternatively, Load and Index [Attention Is All You Need.pdf] as a Sample")
155
- load_sample = gr.Button(sample_button)
156
- index_output = gr.Textbox(label="Indexing Status")
157
- # index_button.click(index_pdf, inputs=pdf_input, outputs=index_output)
158
- load_sample.click(load_sample_pdf, inputs=None, outputs=index_output)
159
-
160
- with gr.Tab("Chatbot"):
161
  with gr.Row():
162
  with gr.Column():
163
- gr.ChatInterface(
164
- fn=generate_response,
165
- chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  examples=examples_questions,
167
- additional_inputs=additional_inputs,
 
 
168
  cache_examples=False,
169
  )
170
- # with gr.Column():
171
- # retrieve_button = gr.Button("Retrieve Relevant Info")
172
- # relevant_info = gr.Textbox(
173
- # label="Retrieved Information",
174
- # interactive=False,
175
- # lines=20,
176
- # )
177
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- # Launch the Gradio app
180
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  from langchain_core.runnables import RunnablePassthrough
14
 
15
 
 
 
16
  # Initialize the FAISS vector store
17
  vector_store = None
18
 
19
  # Sample PDF file
20
+ sample_filenames = ["User Guide.pdf",
21
+ "Installation.pdf",
22
+ ]
23
 
24
  desc = """
25
+ <h2 style="text-align: center; color: #333;">This is a Demo of RAG (Retrieval-Augmented Generation)</h2>
26
+ <p style="text-align: left; color: #555;">
27
+ <b>RAG</b> is an approach that combines retrieval-based and generative LLM models to improve the accuracy and relevance of generated text. 
28
  It works by first retrieving relevant documents from an external knowledge source (like PDF files) and then using a LLM model to produce responses based on both the input query and the retrieved content. 
29
  This method enhances factual correctness and allows the model to access up-to-date or domain-specific information without retraining.
30
+ </p>
31
+ <hr/>
32
+ """
33
 
34
+ desc_pdf_upload = """
35
+ <p style="text-align: left; color: #555;">
36
+ Choose the PDF files and click <b>Load and Index Documents</b> button below to upload and index the files. It could take some time depends on the size of files.
37
+ Once you see the message <i>"PDF(s) indexed successfully!"</i> in the below <b>Indexing Status</b>, go to the <b>Chatbot</b> tab to ask any relevant questios.
38
+ </p>
39
+ """
40
 
41
+ desc_sample = """
42
+ <p style="text-align: left; color: #555;">
43
+ Alternatively, click the button below to load a <b>User Guide</b> and an <b>Installation</b> for a smoke alarm device into the vector database. It could take a couple of minutes to process.
44
+ Once you see the message <i>"PDF(s) indexed successfully!"</i> in the below <b>Indexing Status</b>, go to the <b>Chatbot</b> tab to ask any relevant questions about the device.
45
+ </p>
46
+ """
47
 
48
+ gui_css="""
49
+ .gradio-container {
50
+ font-family: 'Inter', sans-serif;
51
+ border-radius: 12px;
52
+ overflow: hidden;
53
+ }
54
+ .panel {
55
+ border-radius: 8px;
56
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
57
+ }
58
+ .gr-button {
59
+ border-radius: 8px;
60
+ padding: 10px 20px;
61
+ font-weight: bold;
62
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
63
+ transition: all 0.2s ease-in-out;
64
+ }
65
+ .gr-button:hover {
66
+ transform: translateY(-2px);
67
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15);
68
+ }
69
+ .gr-textbox textarea {
70
+ border-radius: 8px;
71
+ }
72
+ .gr-slider {
73
+ padding: 10px 0;
74
+ }
75
+ .gr-tabitem {
76
+ padding: 20px;
77
+ }
78
  """
79
 
80
+
81
  sample_button = "Load User Guide and Installation Guide documents"
82
 
83
 
 
104
  Answer:
105
  """
106
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # Function to handle PDF upload and indexing
109
+ def load_pdf(files):
 
 
 
 
 
 
 
110
  global vector_store
111
  documents = []
112
 
113
  # Load the PDFs
114
+ for file in files:
115
+ loader = PyPDFLoader(file.name)
116
  documents.extend(loader.load())
117
  # print(f"{file} is processed!")
118
 
 
121
  texts = text_splitter.split_documents(documents)
122
 
123
  # Embed the chunks
124
+ # embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
125
+ embedding_model_name = "bert-base-uncased"
126
+ embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name, encode_kwargs={"normalize_embeddings": True})
127
 
128
  # Store the embeddings in the vector store
129
  vector_store = FAISS.from_documents(texts, embeddings)
 
137
  def generate_response(query, history, model, temperature, max_tokens, top_p, seed):
138
 
139
  if vector_store is None:
140
+ return "Please upload and index a PDF at the Indexing tab.", ""
141
 
142
  if seed == 0:
143
  seed = random.randint(1, 100000)
 
160
 
161
  return response, relevant_info
162
 
 
 
 
 
 
 
 
163
 
164
 
165
+ template = """
166
+ You are a helpful AI assistant. Use the following context to answer the question.
167
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
168
+
169
+ {context}
170
+
171
+ Question: {question}
172
+ """
173
+
174
+
175
+
176
+ # --- Gradio Interface using gr.Blocks() ---
177
+ with gr.Blocks(theme=gr.themes.Soft(), css=gui_css) as demo:
178
  with gr.Tab("Indexing"):
179
+ with gr.Row():
180
+ gr.Markdown(desc)
181
+
 
 
 
 
 
 
 
 
182
  with gr.Row():
183
  with gr.Column():
184
+ gr.Markdown(desc_pdf_upload)
185
+ pdf_files = gr.File(label="Upload PDF Document", file_types=[".pdf"], interactive=True, file_count="multiple")
186
+ load_button = gr.Button("Load and Index Documents", variant="secondary")
187
+
188
+ with gr.Column():
189
+ gr.Markdown(desc_sample)
190
+ sample_files = gr.File(
191
+ label="Sample PDF Files",
192
+ file_count="multiple",
193
+ file_types=[".pdf"],
194
+ value=sample_filenames,
195
+ visible=True,
196
+ interactive=False
197
+ )
198
+ sample_button = gr.Button(sample_button)
199
+
200
+ with gr.Row():
201
+ index_output = gr.Textbox(label="Indexing Status")
202
+ sample_button.click(load_pdf, inputs=sample_files, outputs=index_output)
203
+ load_button.click(load_pdf, inputs=pdf_files, outputs=index_output)
204
+
205
+ with gr.Tab("Chatbot"):
206
+ with gr.Row():
207
+ with gr.Column(scale=2):
208
+ # Chatbot component
209
+ chatbot = gr.Chatbot(
210
+ show_label=False,
211
+ show_share_button=False,
212
+ show_copy_button=True,
213
+ layout="panel",
214
+ height=500, # Set a fixed height for the chatbot
215
+ avatar_images=(
216
+ "https://placehold.co/60x60/FFD700/000000?text=U", # User avatar
217
+ "https://placehold.co/60x60/6366F1/FFFFFF?text=AI" # Bot avatar
218
+ )
219
+ )
220
+
221
+ # Message input textbox
222
+ msg = gr.Textbox(
223
+ label="Your Message",
224
+ placeholder="Type your message here...",
225
+ show_copy_button=True,
226
+ container=False # Prevent it from being wrapped in a default container
227
+ )
228
+
229
+ with gr.Row():
230
+ submit_btn = gr.Button("Send", variant="primary")
231
+ clear_btn = gr.ClearButton() # Will be configured below
232
+
233
+ gr.Examples(
234
  examples=examples_questions,
235
+ inputs=[msg],
236
+ outputs=[msg], # Update the message input with the example
237
+ label="Quick Examples",
238
  cache_examples=False,
239
  )
 
 
 
 
 
 
 
240
 
241
+ with gr.Column(scale=1):
242
+ gr.Markdown("### LLM Settings")
243
+ model_name = gr.Dropdown(label="Model Name",
244
+ choices=[
245
+ "llama-3.3-70b-versatile",
246
+ "llama-3.1-8b-instant",
247
+ "llama3-70b-8192",
248
+ "llama3-8b-8192",
249
+ "mixtral-8x7b-32768",
250
+ "gemma2-9b-it"
251
+ ],
252
+ value="llama-3.3-70b-versatile",
253
+ interactive=True
254
+ )
255
+
256
+
257
+ temperature_slider = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.01, label="Temperature", interactive=True)
258
+ max_tokens_slider = gr.Slider(minimum=10, maximum=2000, value=500, step=10, label="Max Tokens", interactive=True)
259
+ top_p_slider = gr.Slider(minimum=0, maximum=1, value=0.9, step=0.01, label="Top P", interactive=True)
260
+ seed_number = gr.Number(minimum=0, maximum=100000, value=0, step=1, label="Seed", precision=0, interactive=True)
261
+
262
+ gr.Markdown("### Retrieved Information")
263
+ # Textbox for relevant_info
264
+ relevant_info_textbox = gr.Textbox(
265
+ label="Retrieved Information",
266
+ interactive=False, # Not editable by the user
267
+ lines=20,
268
+ show_copy_button=True,
269
+ autoscroll=True,
270
+ container=True # Ensure it has a container for styling
271
+ )
272
+
273
+ # --- Event Handling ---
274
+ # This function acts as a wrapper to process inputs and distribute outputs
275
+ def process_chat_and_info(message, chat_history, model, temp, max_tok, top_p_val, seed_val):
276
+ # Call your generate_response function which returns two values
277
+ bot_message, retrieved_info = generate_response(
278
+ message, chat_history, model, temp, max_tok, top_p_val, seed_val
279
+ )
280
+
281
+ # Update the chat history for the chatbot component
282
+ chat_history.append((message, bot_message))
283
 
284
+ # Return values in the order of the outputs list
285
+ return chat_history, retrieved_info, "" # Clear the message input after sending
286
+
287
+ # Bind the `process_chat_and_info` function to the submit event of the message textbox
288
+ msg.submit(
289
+ fn=process_chat_and_info,
290
+ inputs=[msg, chatbot, model_name, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
291
+ outputs=[chatbot, relevant_info_textbox, msg], # Order matters here: chatbot, relevant_info, then msg
292
+ queue=False # Set to True if you expect heavy load
293
+ )
294
+
295
+ # Bind the `process_chat_and_info` function to the click event of the send button
296
+ submit_btn.click(
297
+ fn=process_chat_and_info,
298
+ inputs=[msg, chatbot, model_name, temperature_slider, max_tokens_slider, top_p_slider, seed_number],
299
+ outputs=[chatbot, relevant_info_textbox, msg], # Order matters here
300
+ queue=False # Set to True if you expect heavy load
301
+ )
302
+ # Configure the clear button to clear both the chatbot and the relevant_info_textbox
303
+ clear_btn.add([msg, chatbot, relevant_info_textbox])
304
+
305
+
306
+ demo.launch(server_name="0.0.0.0", server_port=7860)