Spaces:
Paused
Paused
Shreyas094
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -210,69 +210,103 @@ def google_search(term, num_results=5, lang="en", timeout=5, safe="active", ssl_
|
|
210 |
print(f"Result {i}:")
|
211 |
print(f" Link: {result['link']}")
|
212 |
if result['text']:
|
213 |
-
print(f" Text: {result['text'][:100]}...") #
|
214 |
else:
|
215 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
return all_results
|
217 |
|
218 |
-
def
|
219 |
global conversation_history
|
220 |
|
221 |
-
|
|
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
similarity = get_similarity(question, prev_question)
|
226 |
-
if similarity > 0.8:
|
227 |
-
return prev_answer
|
228 |
-
|
229 |
-
# Retrieve relevant documents from the vector store
|
230 |
-
if os.path.exists("faiss_database"):
|
231 |
-
db = FAISS.load_local("faiss_database", embeddings, allow_dangerous_deserialization=True)
|
232 |
-
relevant_docs = db.similarity_search(question, k=3)
|
233 |
else:
|
234 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
|
236 |
-
|
237 |
-
if enable_web_search and len(relevant_docs) == 0:
|
238 |
-
web_search_results = google_search(question, num_results=5)
|
239 |
-
web_docs = [Document(page_content=res["text"] or "", metadata={"source": res["link"]}) for res in web_search_results if res["text"]]
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
246 |
|
247 |
-
|
|
|
248 |
|
249 |
-
|
250 |
-
|
251 |
-
else:
|
252 |
-
history_text = "\n".join([f"Q: {h['question']}\nA: {h['answer']}" for h in history]) if history else "None"
|
253 |
-
context = context if context else "None"
|
254 |
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
).format(history=history_text, context=context, question=question)
|
259 |
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
|
266 |
return answer
|
267 |
|
268 |
-
def
|
269 |
-
if
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
def extract_db_to_excel():
|
278 |
embed = get_embeddings()
|
@@ -303,43 +337,47 @@ def export_memory_db_to_excel():
|
|
303 |
|
304 |
return excel_path
|
305 |
|
|
|
306 |
with gr.Blocks() as demo:
|
307 |
-
|
308 |
-
pdf_file = gr.File(label="Upload PDF")
|
309 |
-
with gr.Row():
|
310 |
-
recursive_check = gr.Checkbox(label="Use Recursive Text Splitter")
|
311 |
-
upload_button = gr.Button("Upload and Process")
|
312 |
-
with gr.Row():
|
313 |
-
upload_output = gr.Textbox(label="Upload Output")
|
314 |
|
315 |
with gr.Row():
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
with gr.Row():
|
323 |
-
ask_button = gr.Button("Ask")
|
324 |
-
with gr.Row():
|
325 |
-
answer = gr.Textbox(label="Answer")
|
326 |
|
327 |
with gr.Row():
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
|
339 |
-
|
340 |
-
|
341 |
-
clear_button.click(clear_cache, [], clear_output)
|
342 |
-
export_db_button.click(extract_db_to_excel, [], export_db_output)
|
343 |
-
export_memory_button.click(export_memory_db_to_excel, [], export_memory_output)
|
344 |
|
345 |
-
|
|
|
|
210 |
print(f"Result {i}:")
|
211 |
print(f" Link: {result['link']}")
|
212 |
if result['text']:
|
213 |
+
print(f" Text: {result['text'][:100]}...") # Print first 100 characters
|
214 |
else:
|
215 |
+
print(" Text: None")
|
216 |
+
print("End of search results")
|
217 |
+
|
218 |
+
if not all_results:
|
219 |
+
print("No search results found. Returning a default message.")
|
220 |
+
return [{"link": None, "text": "No information found in the web search results."}]
|
221 |
+
|
222 |
return all_results
|
223 |
|
224 |
+
def ask_question(question, temperature, top_p, repetition_penalty, web_search):
|
225 |
global conversation_history
|
226 |
|
227 |
+
if not question:
|
228 |
+
return "Please enter a question."
|
229 |
|
230 |
+
if question in memory_database and not web_search:
|
231 |
+
answer = memory_database[question]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
232 |
else:
|
233 |
+
model = get_model(temperature, top_p, repetition_penalty)
|
234 |
+
embed = get_embeddings()
|
235 |
+
|
236 |
+
if web_search:
|
237 |
+
search_results = google_search(question)
|
238 |
+
context_str = "\n".join([result["text"] for result in search_results if result["text"]])
|
239 |
+
|
240 |
+
# Convert web search results to Document format
|
241 |
+
web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
|
242 |
+
|
243 |
+
# Create a temporary FAISS database for web search results
|
244 |
+
temp_database = FAISS.from_documents(web_docs, embed)
|
245 |
+
|
246 |
+
retriever = temp_database.as_retriever()
|
247 |
+
relevant_docs = retriever.get_relevant_documents(question)
|
248 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
249 |
+
|
250 |
+
prompt_template = """
|
251 |
+
Answer the question based on the following web search results:
|
252 |
+
Web Search Results:
|
253 |
+
{context}
|
254 |
+
Current Question: {question}
|
255 |
+
If the web search results don't contain relevant information, state that the information is not available in the search results.
|
256 |
+
Provide a concise and direct answer to the question without mentioning the web search or these instructions:
|
257 |
+
"""
|
258 |
+
prompt_val = ChatPromptTemplate.from_template(prompt_template)
|
259 |
+
formatted_prompt = prompt_val.format(context=context_str, question=question)
|
260 |
+
else:
|
261 |
+
# Check if the FAISS database exists
|
262 |
+
if os.path.exists("faiss_database"):
|
263 |
+
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
264 |
+
else:
|
265 |
+
return "No FAISS database found. Please upload documents to create the vector store."
|
266 |
|
267 |
+
history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
|
|
|
|
|
|
|
268 |
|
269 |
+
if is_related_to_history(question, conversation_history):
|
270 |
+
context_str = "No additional context needed. Please refer to the conversation history."
|
271 |
+
else:
|
272 |
+
retriever = database.as_retriever()
|
273 |
+
relevant_docs = retriever.get_relevant_documents(question)
|
274 |
+
context_str = "\n".join([doc.page_content for doc in relevant_docs])
|
275 |
|
276 |
+
prompt_val = ChatPromptTemplate.from_template(prompt)
|
277 |
+
formatted_prompt = prompt_val.format(history=history_str, context=context_str, question=question)
|
278 |
|
279 |
+
answer = generate_chunked_response(model, formatted_prompt)
|
280 |
+
answer = re.split(r'Question:|Current Question:', answer)[-1].strip()
|
|
|
|
|
|
|
281 |
|
282 |
+
# Remove any remaining prompt instructions from the answer
|
283 |
+
answer_lines = answer.split('\n')
|
284 |
+
answer = '\n'.join(line for line in answer_lines if not line.startswith('If') and not line.startswith('Provide'))
|
|
|
285 |
|
286 |
+
if not web_search:
|
287 |
+
memory_database[question] = answer
|
288 |
+
|
289 |
+
if not web_search:
|
290 |
+
conversation_history = manage_conversation_history(question, answer, conversation_history)
|
291 |
|
292 |
return answer
|
293 |
|
294 |
+
def update_vectors(files, use_recursive_splitter):
|
295 |
+
if not files:
|
296 |
+
return "Please upload at least one PDF file."
|
297 |
+
|
298 |
+
embed = get_embeddings()
|
299 |
+
total_chunks = 0
|
300 |
+
|
301 |
+
for file in files:
|
302 |
+
if use_recursive_splitter:
|
303 |
+
data = load_and_split_document_recursive(file)
|
304 |
+
else:
|
305 |
+
data = load_and_split_document_basic(file)
|
306 |
+
create_or_update_database(data, embed)
|
307 |
+
total_chunks += len(data)
|
308 |
+
|
309 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
|
310 |
|
311 |
def extract_db_to_excel():
|
312 |
embed = get_embeddings()
|
|
|
337 |
|
338 |
return excel_path
|
339 |
|
340 |
+
# Gradio interface
|
341 |
with gr.Blocks() as demo:
|
342 |
+
gr.Markdown("# Chat with your PDF documents")
|
|
|
|
|
|
|
|
|
|
|
|
|
343 |
|
344 |
with gr.Row():
|
345 |
+
file_input = gr.Files(label="Upload your PDF documents", file_types=[".pdf"])
|
346 |
+
update_button = gr.Button("Update Vector Store")
|
347 |
+
use_recursive_splitter = gr.Checkbox(label="Use Recursive Text Splitter", value=False)
|
348 |
+
|
349 |
+
update_output = gr.Textbox(label="Update Status")
|
350 |
+
update_button.click(update_vectors, inputs=[file_input, use_recursive_splitter], outputs=update_output)
|
|
|
|
|
|
|
|
|
351 |
|
352 |
with gr.Row():
|
353 |
+
with gr.Column(scale=2):
|
354 |
+
chatbot = gr.Chatbot(label="Conversation")
|
355 |
+
question_input = gr.Textbox(label="Ask a question about your documents")
|
356 |
+
submit_button = gr.Button("Submit")
|
357 |
+
with gr.Column(scale=1):
|
358 |
+
temperature_slider = gr.Slider(label="Temperature", minimum=0.0, maximum=1.0, value=0.5, step=0.1)
|
359 |
+
top_p_slider = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, value=0.9, step=0.1)
|
360 |
+
repetition_penalty_slider = gr.Slider(label="Repetition Penalty", minimum=1.0, maximum=2.0, value=1.0, step=0.1)
|
361 |
+
web_search_checkbox = gr.Checkbox(label="Enable Web Search", value=False)
|
362 |
+
|
363 |
+
def chat(question, history):
|
364 |
+
answer = ask_question(question, temperature_slider.value, top_p_slider.value, repetition_penalty_slider.value, web_search_checkbox.value)
|
365 |
+
history.append((question, answer))
|
366 |
+
return "", history
|
367 |
+
|
368 |
+
submit_button.click(chat, inputs=[question_input, chatbot], outputs=[question_input, chatbot])
|
369 |
+
|
370 |
+
extract_button = gr.Button("Extract Database to Excel")
|
371 |
+
excel_output = gr.File(label="Download Excel File")
|
372 |
+
extract_button.click(extract_db_to_excel, inputs=[], outputs=excel_output)
|
373 |
+
|
374 |
+
export_memory_button = gr.Button("Export Memory Database to Excel")
|
375 |
+
memory_excel_output = gr.File(label="Download Memory Excel File")
|
376 |
+
export_memory_button.click(export_memory_db_to_excel, inputs=[], outputs=memory_excel_output)
|
377 |
|
378 |
+
clear_button = gr.Button("Clear Cache")
|
379 |
+
clear_output = gr.Textbox(label="Cache Status")
|
380 |
+
clear_button.click(clear_cache, inputs=[], outputs=clear_output)
|
|
|
|
|
381 |
|
382 |
+
if __name__ == "__main__":
|
383 |
+
demo.launch()
|