Shreyas094 commited on
Commit
c89450b
1 Parent(s): d52f389

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -10
app.py CHANGED
@@ -15,6 +15,7 @@ from langchain_core.documents import Document
15
  from huggingface_hub import InferenceClient
16
  import inspect
17
  import logging
 
18
 
19
 
20
  # Set up basic configuration for logging
@@ -99,6 +100,9 @@ def update_vectors(files, parser):
99
  logging.info(f"Processing file: {file.name}")
100
  try:
101
  data = load_document(file, parser)
 
 
 
102
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
103
  all_data.extend(data)
104
  total_chunks += len(data)
@@ -112,22 +116,69 @@ def update_vectors(files, parser):
112
 
113
  logging.info(f"Total chunks processed: {total_chunks}")
114
 
115
- if os.path.exists("faiss_database"):
116
- logging.info("Updating existing FAISS database")
117
- database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
118
- database.add_documents(all_data)
119
- else:
120
- logging.info("Creating new FAISS database")
121
- database = FAISS.from_documents(all_data, embed)
122
 
123
- database.save_local("faiss_database")
124
- logging.info("FAISS database saved")
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  # Save the updated list of documents
127
  save_documents(uploaded_documents)
128
 
129
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
132
  print(f"Starting generate_chunked_response with {num_calls} calls")
133
  full_response = ""
@@ -536,7 +587,7 @@ def display_documents():
536
  return gr.CheckboxGroup(
537
  choices=[doc["name"] for doc in uploaded_documents],
538
  value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
539
- label="Select documents to query"
540
  )
541
 
542
  # Add this new function
@@ -623,6 +674,7 @@ with demo:
623
  refresh_button = gr.Button("Refresh Document List")
624
 
625
  update_output = gr.Textbox(label="Update Status")
 
626
 
627
  # Update both the output text and the document selector
628
  update_button.click(update_vectors,
@@ -633,6 +685,11 @@ with demo:
633
  refresh_button.click(refresh_documents,
634
  inputs=[],
635
  outputs=[document_selector])
 
 
 
 
 
636
 
637
  gr.Markdown(
638
  """
 
15
  from huggingface_hub import InferenceClient
16
  import inspect
17
  import logging
18
+ import shutil
19
 
20
 
21
  # Set up basic configuration for logging
 
100
  logging.info(f"Processing file: {file.name}")
101
  try:
102
  data = load_document(file, parser)
103
+ if not data:
104
+ logging.warning(f"No chunks loaded from {file.name}")
105
+ continue
106
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
107
  all_data.extend(data)
108
  total_chunks += len(data)
 
116
 
117
  logging.info(f"Total chunks processed: {total_chunks}")
118
 
119
+ if not all_data:
120
+ logging.warning("No valid data extracted from uploaded files")
121
+ return "No valid data could be extracted from the uploaded files. Please check the file contents and try again.", display_documents()
 
 
 
 
122
 
123
+ try:
124
+ if os.path.exists("faiss_database"):
125
+ logging.info("Updating existing FAISS database")
126
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
127
+ database.add_documents(all_data)
128
+ else:
129
+ logging.info("Creating new FAISS database")
130
+ database = FAISS.from_documents(all_data, embed)
131
+
132
+ database.save_local("faiss_database")
133
+ logging.info("FAISS database saved")
134
+ except Exception as e:
135
+ logging.error(f"Error updating FAISS database: {str(e)}")
136
+ return f"Error updating vector store: {str(e)}", display_documents()
137
 
138
  # Save the updated list of documents
139
  save_documents(uploaded_documents)
140
 
141
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
142
 
143
+ def delete_documents(selected_docs):
144
+ global uploaded_documents
145
+
146
+ if not selected_docs:
147
+ return "No documents selected for deletion.", display_documents()
148
+
149
+ embed = get_embeddings()
150
+ database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
151
+
152
+ deleted_docs = []
153
+ docs_to_keep = []
154
+ for doc in database.docstore._dict.values():
155
+ if doc.metadata.get("source") not in selected_docs:
156
+ docs_to_keep.append(doc)
157
+ else:
158
+ deleted_docs.append(doc.metadata.get("source", "Unknown"))
159
+
160
+ # Print debugging information
161
+ logging.info(f"Total documents before deletion: {len(database.docstore._dict)}")
162
+ logging.info(f"Documents to keep: {len(docs_to_keep)}")
163
+ logging.info(f"Documents to delete: {len(deleted_docs)}")
164
+
165
+ if not docs_to_keep:
166
+ # If all documents are deleted, remove the FAISS database directory
167
+ if os.path.exists("faiss_database"):
168
+ shutil.rmtree("faiss_database")
169
+ logging.info("All documents deleted. Removed FAISS database directory.")
170
+ else:
171
+ # Create new FAISS index with remaining documents
172
+ new_database = FAISS.from_documents(docs_to_keep, embed)
173
+ new_database.save_local("faiss_database")
174
+ logging.info(f"Created new FAISS index with {len(docs_to_keep)} documents.")
175
+
176
+ # Update uploaded_documents list
177
+ uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in deleted_docs]
178
+ save_documents(uploaded_documents)
179
+
180
+ return f"Deleted documents: {', '.join(deleted_docs)}", display_documents()
181
+
182
  def generate_chunked_response(prompt, model, max_tokens=10000, num_calls=3, temperature=0.2, should_stop=False):
183
  print(f"Starting generate_chunked_response with {num_calls} calls")
184
  full_response = ""
 
587
  return gr.CheckboxGroup(
588
  choices=[doc["name"] for doc in uploaded_documents],
589
  value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
590
+ label="Select documents to query or delete"
591
  )
592
 
593
  # Add this new function
 
674
  refresh_button = gr.Button("Refresh Document List")
675
 
676
  update_output = gr.Textbox(label="Update Status")
677
+ delete_button = gr.Button("Delete Selected Documents")
678
 
679
  # Update both the output text and the document selector
680
  update_button.click(update_vectors,
 
685
  refresh_button.click(refresh_documents,
686
  inputs=[],
687
  outputs=[document_selector])
688
+
689
+ # Add the delete button functionality
690
+ delete_button.click(delete_documents,
691
+ inputs=[document_selector],
692
+ outputs=[update_output, document_selector])
693
 
694
  gr.Markdown(
695
  """