Shreyas094 commited on
Commit
57b395c
·
verified ·
1 Parent(s): 754d288

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -55
app.py CHANGED
@@ -66,29 +66,6 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
69
- def scan_faiss_database():
70
- global uploaded_documents
71
- uploaded_documents = []
72
-
73
- if os.path.exists("faiss_database"):
74
- embed = get_embeddings()
75
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
76
-
77
- for doc in database.docstore._dict.values():
78
- file_name = os.path.basename(doc.metadata['source'])
79
- if not any(d['name'] == file_name for d in uploaded_documents):
80
- uploaded_documents.append({"name": file_name, "selected": True})
81
-
82
- return uploaded_documents
83
-
84
- # Call this function when the application starts
85
- uploaded_documents = scan_faiss_database()
86
-
87
- def initialize_session():
88
- global uploaded_documents
89
- uploaded_documents = scan_faiss_database()
90
- return uploaded_documents
91
-
92
  def update_vectors(files, parser):
93
  global uploaded_documents
94
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
@@ -112,12 +89,12 @@ def update_vectors(files, parser):
112
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
113
  all_data.extend(data)
114
  total_chunks += len(data)
115
- file_name = os.path.basename(file.name)
116
- if not any(doc["name"] == file_name for doc in uploaded_documents):
117
- uploaded_documents.append({"name": file_name, "selected": True})
118
- logging.info(f"Added new document to uploaded_documents: {file_name}")
119
  else:
120
- logging.info(f"Document already exists in uploaded_documents: {file_name}")
121
  except Exception as e:
122
  logging.error(f"Error processing file {file.name}: {str(e)}")
123
 
@@ -133,6 +110,7 @@ def update_vectors(files, parser):
133
 
134
  database.save_local("faiss_database")
135
  logging.info("FAISS database saved")
 
136
 
137
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
138
  choices=[doc["name"] for doc in uploaded_documents],
@@ -240,14 +218,14 @@ class CitingSources(BaseModel):
240
  ...,
241
  description="List of sources to cite. Should be an URL of the source."
242
  )
243
- def chatbot_interface(message, history, use_web_search, model, temperature, num_calls, selected_docs):
244
  if not message.strip():
245
  return "", history
246
 
247
  history = history + [(message, "")]
248
 
249
  try:
250
- for response in respond(message, history, model, temperature, num_calls, use_web_search, selected_docs):
251
  history[-1] = (message, response)
252
  yield history
253
  except gr.CancelledError:
@@ -270,12 +248,15 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
270
  logging.info(f"User Query: {message}")
271
  logging.info(f"Model Used: {model}")
272
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
 
273
  logging.info(f"Selected Documents: {selected_docs}")
274
 
275
  try:
276
  if use_web_search:
277
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
278
  response = f"{main_content}\n\n{sources}"
 
 
279
  yield response
280
  else:
281
  embed = get_embeddings()
@@ -283,8 +264,9 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
283
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
284
  retriever = database.as_retriever()
285
 
 
286
  all_relevant_docs = retriever.get_relevant_documents(message)
287
- relevant_docs = [doc for doc in all_relevant_docs if os.path.basename(doc.metadata["source"]) in selected_docs]
288
 
289
  if not relevant_docs:
290
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
@@ -299,10 +281,14 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
299
  if model == "@cf/meta/llama-3.1-8b-instruct":
300
  # Use Cloudflare API
301
  for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
 
 
302
  yield partial_response
303
  else:
304
  # Use Hugging Face API
305
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
 
 
306
  yield partial_response
307
  except Exception as e:
308
  logging.error(f"Error with {model}: {str(e)}")
@@ -555,9 +541,7 @@ demo = gr.ChatInterface(
555
  )
556
 
557
  # Add file upload functionality
558
- with gr.Blocks() as demo:
559
- session_documents = gr.State(initialize_session)
560
-
561
  gr.Markdown("## Upload PDF Documents")
562
 
563
  with gr.Row():
@@ -566,29 +550,11 @@ with gr.Blocks() as demo:
566
  update_button = gr.Button("Upload Document")
567
 
568
  update_output = gr.Textbox(label="Update Status")
569
- document_selector = gr.CheckboxGroup(
570
- choices=[doc["name"] for doc in uploaded_documents],
571
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
572
- label="Select documents to query"
573
- )
574
 
575
  # Update both the output text and the document selector
576
- update_button.click(
577
- update_vectors,
578
- inputs=[file_input, parser_dropdown],
579
- outputs=[update_output, document_selector]
580
- )
581
-
582
- # Add a refresh button to update the document selector
583
- refresh_button = gr.Button("Refresh Document List")
584
- refresh_button.click(
585
- lambda: gr.CheckboxGroup(
586
- choices=[doc["name"] for doc in uploaded_documents],
587
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
588
- label="Select documents to query"
589
- ),
590
- outputs=[document_selector]
591
- )
592
 
593
  gr.Markdown(
594
  """
 
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def update_vectors(files, parser):
70
  global uploaded_documents
71
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
 
89
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
90
  all_data.extend(data)
91
  total_chunks += len(data)
92
+ # Append new documents instead of replacing
93
+ if not any(doc["name"] == file.name for doc in uploaded_documents):
94
+ uploaded_documents.append({"name": file.name, "selected": True})
95
+ logging.info(f"Added new document to uploaded_documents: {file.name}")
96
  else:
97
+ logging.info(f"Document already exists in uploaded_documents: {file.name}")
98
  except Exception as e:
99
  logging.error(f"Error processing file {file.name}: {str(e)}")
100
 
 
110
 
111
  database.save_local("faiss_database")
112
  logging.info("FAISS database saved")
113
+
114
 
115
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
116
  choices=[doc["name"] for doc in uploaded_documents],
 
218
  ...,
219
  description="List of sources to cite. Should be an URL of the source."
220
  )
221
+ def chatbot_interface(message, history, use_web_search, model, temperature, num_calls):
222
  if not message.strip():
223
  return "", history
224
 
225
  history = history + [(message, "")]
226
 
227
  try:
228
+ for response in respond(message, history, model, temperature, num_calls, use_web_search):
229
  history[-1] = (message, response)
230
  yield history
231
  except gr.CancelledError:
 
248
  logging.info(f"User Query: {message}")
249
  logging.info(f"Model Used: {model}")
250
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
251
+
252
  logging.info(f"Selected Documents: {selected_docs}")
253
 
254
  try:
255
  if use_web_search:
256
  for main_content, sources in get_response_with_search(message, model, num_calls=num_calls, temperature=temperature):
257
  response = f"{main_content}\n\n{sources}"
258
+ first_line = response.split('\n')[0] if response else ''
259
+ # logging.info(f"Generated Response (first line): {first_line}")
260
  yield response
261
  else:
262
  embed = get_embeddings()
 
264
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
265
  retriever = database.as_retriever()
266
 
267
+ # Filter relevant documents based on user selection
268
  all_relevant_docs = retriever.get_relevant_documents(message)
269
+ relevant_docs = [doc for doc in all_relevant_docs if doc.metadata["source"] in selected_docs]
270
 
271
  if not relevant_docs:
272
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
 
281
  if model == "@cf/meta/llama-3.1-8b-instruct":
282
  # Use Cloudflare API
283
  for partial_response in get_response_from_cloudflare(prompt="", context=context_str, query=message, num_calls=num_calls, temperature=temperature, search_type="pdf"):
284
+ first_line = partial_response.split('\n')[0] if partial_response else ''
285
+ logging.info(f"Generated Response (first line): {first_line}")
286
  yield partial_response
287
  else:
288
  # Use Hugging Face API
289
  for partial_response in get_response_from_pdf(message, model, selected_docs, num_calls=num_calls, temperature=temperature):
290
+ first_line = partial_response.split('\n')[0] if partial_response else ''
291
+ logging.info(f"Generated Response (first line): {first_line}")
292
  yield partial_response
293
  except Exception as e:
294
  logging.error(f"Error with {model}: {str(e)}")
 
541
  )
542
 
543
  # Add file upload functionality
544
+ with demo:
 
 
545
  gr.Markdown("## Upload PDF Documents")
546
 
547
  with gr.Row():
 
550
  update_button = gr.Button("Upload Document")
551
 
552
  update_output = gr.Textbox(label="Update Status")
 
 
 
 
 
553
 
554
  # Update both the output text and the document selector
555
+ update_button.click(update_vectors,
556
+ inputs=[file_input, parser_dropdown],
557
+ outputs=[update_output, document_selector])
 
 
 
 
 
 
 
 
 
 
 
 
 
558
 
559
  gr.Markdown(
560
  """