Shreyas094 commited on
Commit
d513b0d
·
verified ·
1 Parent(s): f48c882

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -48
app.py CHANGED
@@ -66,37 +66,22 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
69
- # File to store the list of uploaded documents
70
- # File to store metadata about uploaded documents
71
- METADATA_FILE = "document_metadata.json"
72
-
73
- def load_document_metadata():
74
- if os.path.exists(METADATA_FILE):
75
- with open(METADATA_FILE, 'r') as f:
76
- return json.load(f)
77
- return []
78
-
79
- def save_document_metadata(metadata):
80
- with open(METADATA_FILE, 'w') as f:
81
- json.dump(metadata, f)
82
-
83
  def update_vectors(files, parser):
 
84
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
85
 
86
  if not files:
87
  logging.warning("No files provided for update_vectors")
88
  return "Please upload at least one PDF file.", gr.CheckboxGroup(
89
- choices=[],
90
- value=[],
91
  label="Select documents to query"
92
  )
93
 
94
- embed = HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
95
  total_chunks = 0
96
 
97
  all_data = []
98
- metadata = load_document_metadata()
99
-
100
  for file in files:
101
  logging.info(f"Processing file: {file.name}")
102
  try:
@@ -104,13 +89,12 @@ def update_vectors(files, parser):
104
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
105
  all_data.extend(data)
106
  total_chunks += len(data)
107
-
108
- # Update metadata
109
- if not any(doc["name"] == file.name for doc in metadata):
110
- metadata.append({"name": file.name, "selected": True})
111
- logging.info(f"Added new document to metadata: {file.name}")
112
  else:
113
- logging.info(f"Document already exists in metadata: {file.name}")
114
  except Exception as e:
115
  logging.error(f"Error processing file {file.name}: {str(e)}")
116
 
@@ -127,25 +111,9 @@ def update_vectors(files, parser):
127
  database.save_local("faiss_database")
128
  logging.info("FAISS database saved")
129
 
130
- # Save the updated metadata
131
- save_document_metadata(metadata)
132
- logging.info("Document metadata saved")
133
-
134
- completion_message = f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
135
- logging.info(completion_message)
136
-
137
- updated_choices = [doc["name"] for doc in metadata]
138
- updated_values = [doc["name"] for doc in metadata if doc["selected"]]
139
-
140
- return completion_message, gr.CheckboxGroup.update(choices=updated_choices, value=updated_values, label="Select documents to query")
141
-
142
- # Make sure to call this function at the start of your script
143
-
144
- def initialize_document_selector():
145
- metadata = load_document_metadata()
146
- return gr.CheckboxGroup(
147
- choices=[doc["name"] for doc in metadata],
148
- value=[doc["name"] for doc in metadata if doc["selected"]],
149
  label="Select documents to query"
150
  )
151
 
@@ -520,8 +488,6 @@ def initial_conversation():
520
  # Define the checkbox outside the demo block
521
  document_selector = gr.CheckboxGroup(label="Select documents to query")
522
 
523
- document_selector = initialize_document_selector()
524
-
525
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
526
 
527
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
@@ -583,7 +549,6 @@ with demo:
583
  update_button = gr.Button("Upload Document")
584
 
585
  update_output = gr.Textbox(label="Update Status")
586
- document_selector = gr.CheckboxGroup(label="Select documents to query")
587
 
588
  # Update both the output text and the document selector
589
  update_button.click(update_vectors,
@@ -604,4 +569,4 @@ with demo:
604
  )
605
 
606
  if __name__ == "__main__":
607
- demo.launch(share=True)
 
66
  def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  def update_vectors(files, parser):
70
+ global uploaded_documents
71
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
72
 
73
  if not files:
74
  logging.warning("No files provided for update_vectors")
75
  return "Please upload at least one PDF file.", gr.CheckboxGroup(
76
+ choices=[doc["name"] for doc in uploaded_documents],
77
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
78
  label="Select documents to query"
79
  )
80
 
81
+ embed = get_embeddings()
82
  total_chunks = 0
83
 
84
  all_data = []
 
 
85
  for file in files:
86
  logging.info(f"Processing file: {file.name}")
87
  try:
 
89
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
90
  all_data.extend(data)
91
  total_chunks += len(data)
92
+ # Append new documents instead of replacing
93
+ if not any(doc["name"] == file.name for doc in uploaded_documents):
94
+ uploaded_documents.append({"name": file.name, "selected": True})
95
+ logging.info(f"Added new document to uploaded_documents: {file.name}")
 
96
  else:
97
+ logging.info(f"Document already exists in uploaded_documents: {file.name}")
98
  except Exception as e:
99
  logging.error(f"Error processing file {file.name}: {str(e)}")
100
 
 
111
  database.save_local("faiss_database")
112
  logging.info("FAISS database saved")
113
 
114
+ return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
115
+ choices=[doc["name"] for doc in uploaded_documents],
116
+ value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  label="Select documents to query"
118
  )
119
 
 
488
  # Define the checkbox outside the demo block
489
  document_selector = gr.CheckboxGroup(label="Select documents to query")
490
 
 
 
491
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
492
 
493
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
 
549
  update_button = gr.Button("Upload Document")
550
 
551
  update_output = gr.Textbox(label="Update Status")
 
552
 
553
  # Update both the output text and the document selector
554
  update_button.click(update_vectors,
 
569
  )
570
 
571
  if __name__ == "__main__":
572
+ demo.launch(share=True)