Shreyas094 commited on
Commit
754d288
·
verified ·
1 Parent(s): 8ee76cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -25
app.py CHANGED
@@ -71,10 +71,9 @@ def scan_faiss_database():
71
  uploaded_documents = []
72
 
73
  if os.path.exists("faiss_database"):
74
- embed = HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
75
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
76
 
77
- # Assuming each document in FAISS has a 'source' metadata field with the file name
78
  for doc in database.docstore._dict.values():
79
  file_name = os.path.basename(doc.metadata['source'])
80
  if not any(d['name'] == file_name for d in uploaded_documents):
@@ -85,6 +84,10 @@ def scan_faiss_database():
85
  # Call this function when the application starts
86
  uploaded_documents = scan_faiss_database()
87
 
 
 
 
 
88
 
89
  def update_vectors(files, parser):
90
  global uploaded_documents
@@ -109,12 +112,12 @@ def update_vectors(files, parser):
109
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
110
  all_data.extend(data)
111
  total_chunks += len(data)
112
- # Append new documents instead of replacing
113
- if not any(doc["name"] == file.name for doc in uploaded_documents):
114
- uploaded_documents.append({"name": file.name, "selected": True})
115
- logging.info(f"Added new document to uploaded_documents: {file.name}")
116
  else:
117
- logging.info(f"Document already exists in uploaded_documents: {file.name}")
118
  except Exception as e:
119
  logging.error(f"Error processing file {file.name}: {str(e)}")
120
 
@@ -131,9 +134,6 @@ def update_vectors(files, parser):
131
  database.save_local("faiss_database")
132
  logging.info("FAISS database saved")
133
 
134
- # After processing new files, rescan the database
135
- uploaded_documents = scan_faiss_database()
136
-
137
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
138
  choices=[doc["name"] for doc in uploaded_documents],
139
  value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
@@ -240,14 +240,14 @@ class CitingSources(BaseModel):
240
  ...,
241
  description="List of sources to cite. Should be an URL of the source."
242
  )
243
- def chatbot_interface(message, history, use_web_search, model, temperature, num_calls):
244
  if not message.strip():
245
  return "", history
246
 
247
  history = history + [(message, "")]
248
 
249
  try:
250
- for response in respond(message, history, model, temperature, num_calls, use_web_search):
251
  history[-1] = (message, response)
252
  yield history
253
  except gr.CancelledError:
@@ -270,15 +270,12 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
270
  logging.info(f"User Query: {message}")
271
  logging.info(f"Model Used: {model}")
272
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
273
-
274
  logging.info(f"Selected Documents: {selected_docs}")
275
 
276
  try:
277
  if use_web_search:
278
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
279
  response = f"{main_content}\n\n{sources}"
280
- first_line = response.split('\n')[0] if response else ''
281
- # logging.info(f"Generated Response (first line): {first_line}")
282
  yield response
283
  else:
284
  embed = get_embeddings()
@@ -286,9 +283,8 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
286
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
287
  retriever = database.as_retriever()
288
 
289
- # Filter relevant documents based on user selection
290
  all_relevant_docs = retriever.get_relevant_documents(message)
291
- relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
292
 
293
  if not relevant_docs:
294
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
@@ -303,14 +299,10 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
303
  if model == "@cf/meta/llama-3.1-8b-instruct":
304
  # Use Cloudflare API
305
  for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
306
- first_line = partial_response.split('\n')[0] if partial_response else ''
307
- logging.info(f"Generated Response (first line): {first_line}")
308
  yield partial_response
309
  else:
310
  # Use Hugging Face API
311
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
312
- first_line = partial_response.split('\n')[0] if partial_response else ''
313
- logging.info(f"Generated Response (first line): {first_line}")
314
  yield partial_response
315
  except Exception as e:
316
  logging.error(f"Error with {model}: {str(e)}")
@@ -563,7 +555,9 @@ demo = gr.ChatInterface(
563
  )
564
 
565
  # Add file upload functionality
566
- with demo:
 
 
567
  gr.Markdown("## Upload PDF Documents")
568
 
569
  with gr.Row():
@@ -572,11 +566,29 @@ with demo:
572
  update_button = gr.Button("Upload Document")
573
 
574
  update_output = gr.Textbox(label="Update Status")
 
 
 
 
 
575
 
576
  # Update both the output text and the document selector
577
- update_button.click(update_vectors,
578
- inputs=[file_input, parser_dropdown],
579
- outputs=[update_output, document_selector])
 
 
 
 
 
 
 
 
 
 
 
 
 
580
 
581
  gr.Markdown(
582
  """
 
71
  uploaded_documents = []
72
 
73
  if os.path.exists("faiss_database"):
74
+ embed = get_embeddings()
75
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
76
 
 
77
  for doc in database.docstore._dict.values():
78
  file_name = os.path.basename(doc.metadata['source'])
79
  if not any(d['name'] == file_name for d in uploaded_documents):
 
84
  # Call this function when the application starts
85
  uploaded_documents = scan_faiss_database()
86
 
87
+ def initialize_session():
88
+ global uploaded_documents
89
+ uploaded_documents = scan_faiss_database()
90
+ return uploaded_documents
91
 
92
  def update_vectors(files, parser):
93
  global uploaded_documents
 
112
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
113
  all_data.extend(data)
114
  total_chunks += len(data)
115
+ file_name = os.path.basename(file.name)
116
+ if not any(doc["name"] == file_name for doc in uploaded_documents):
117
+ uploaded_documents.append({"name": file_name, "selected": True})
118
+ logging.info(f"Added new document to uploaded_documents: {file_name}")
119
  else:
120
+ logging.info(f"Document already exists in uploaded_documents: {file_name}")
121
  except Exception as e:
122
  logging.error(f"Error processing file {file.name}: {str(e)}")
123
 
 
134
  database.save_local("faiss_database")
135
  logging.info("FAISS database saved")
136
 
 
 
 
137
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
138
  choices=[doc["name"] for doc in uploaded_documents],
139
  value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
 
240
  ...,
241
  description="List of sources to cite. Should be an URL of the source."
242
  )
243
+ def chatbot_interface(message, history, use_web_search, model, temperature, num_calls, selected_docs):
244
  if not message.strip():
245
  return "", history
246
 
247
  history = history + [(message, "")]
248
 
249
  try:
250
+ for response in respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
251
  history[-1] = (message, response)
252
  yield history
253
  except gr.CancelledError:
 
270
  logging.info(f"User Query: {message}")
271
  logging.info(f"Model Used: {model}")
272
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
 
273
  logging.info(f"Selected Documents: {selected_docs}")
274
 
275
  try:
276
  if use_web_search:
277
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
278
  response = f"{main_content}\n\n{sources}"
 
 
279
  yield response
280
  else:
281
  embed = get_embeddings()
 
283
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
284
  retriever = database.as_retriever()
285
 
 
286
  all_relevant_docs = retriever.get_relevant_documents(message)
287
+ relevant_docs = [doc for doc in all_relevant_docs if os.path.basename(doc.metadata["source"]) in selected_docs]
288
 
289
  if not relevant_docs:
290
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
 
299
  if model == "@cf/meta/llama-3.1-8b-instruct":
300
  # Use Cloudflare API
301
  for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
 
 
302
  yield partial_response
303
  else:
304
  # Use Hugging Face API
305
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
 
 
306
  yield partial_response
307
  except Exception as e:
308
  logging.error(f"Error with {model}: {str(e)}")
 
555
  )
556
 
557
  # Add file upload functionality
558
+ with gr.Blocks() as demo:
559
+ session_documents = gr.State(initialize_session)
560
+
561
  gr.Markdown("## Upload PDF Documents")
562
 
563
  with gr.Row():
 
566
  update_button = gr.Button("Upload Document")
567
 
568
  update_output = gr.Textbox(label="Update Status")
569
+ document_selector = gr.CheckboxGroup(
570
+ choices=[doc["name"] for doc in uploaded_documents],
571
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
572
+ label="Select documents to query"
573
+ )
574
 
575
  # Update both the output text and the document selector
576
+ update_button.click(
577
+ update_vectors,
578
+ inputs=[file_input, parser_dropdown],
579
+ outputs=[update_output, document_selector]
580
+ )
581
+
582
+ # Add a refresh button to update the document selector
583
+ refresh_button = gr.Button("Refresh Document List")
584
+ refresh_button.click(
585
+ lambda: gr.CheckboxGroup(
586
+ choices=[doc["name"] for doc in uploaded_documents],
587
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
588
+ label="Select documents to query"
589
+ ),
590
+ outputs=[document_selector]
591
+ )
592
 
593
  gr.Markdown(
594
  """