Shreyas094 commited on
Commit
4336f84
·
verified ·
1 Parent(s): b6325ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -24
app.py CHANGED
@@ -67,29 +67,27 @@ def get_embeddings():
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
69
  # File to store the list of uploaded documents
70
- DOCUMENTS_FILE = "uploaded_documents.json"
 
71
 
72
- def load_uploaded_documents():
73
- global uploaded_documents
74
- if os.path.exists(DOCUMENTS_FILE):
75
- with open(DOCUMENTS_FILE, 'r') as f:
76
- uploaded_documents = json.load(f)
77
- else:
78
- uploaded_documents = []
79
 
80
- def save_uploaded_documents():
81
- with open(DOCUMENTS_FILE, 'w') as f:
82
- json.dump(uploaded_documents, f)
83
 
84
  def update_vectors(files, parser):
85
- global uploaded_documents
86
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
87
 
88
  if not files:
89
  logging.warning("No files provided for update_vectors")
90
  return "Please upload at least one PDF file.", gr.CheckboxGroup(
91
- choices=[doc["name"] for doc in uploaded_documents],
92
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
93
  label="Select documents to query"
94
  )
95
 
@@ -97,6 +95,8 @@ def update_vectors(files, parser):
97
  total_chunks = 0
98
 
99
  all_data = []
 
 
100
  for file in files:
101
  logging.info(f"Processing file: {file.name}")
102
  try:
@@ -104,12 +104,13 @@ def update_vectors(files, parser):
104
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
105
  all_data.extend(data)
106
  total_chunks += len(data)
107
- # Append new documents instead of replacing
108
- if not any(doc["name"] == file.name for doc in uploaded_documents):
109
- uploaded_documents.append({"name": file.name, "selected": True})
110
- logging.info(f"Added new document to uploaded_documents: {file.name}")
 
111
  else:
112
- logging.info(f"Document already exists in uploaded_documents: {file.name}")
113
  except Exception as e:
114
  logging.error(f"Error processing file {file.name}: {str(e)}")
115
 
@@ -126,13 +127,13 @@ def update_vectors(files, parser):
126
  database.save_local("faiss_database")
127
  logging.info("FAISS database saved")
128
 
129
- # Save the updated list of documents
130
- save_uploaded_documents()
131
- logging.info("Uploaded documents list saved")
132
 
133
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
134
- choices=[doc["name"] for doc in uploaded_documents],
135
- value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
136
  label="Select documents to query"
137
  )
138
 
@@ -510,6 +511,8 @@ def initial_conversation():
510
  # Define the checkbox outside the demo block
511
  document_selector = gr.CheckboxGroup(label="Select documents to query")
512
 
 
 
513
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
514
 
515
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
@@ -571,6 +574,10 @@ with demo:
571
  update_button = gr.Button("Upload Document")
572
 
573
  update_output = gr.Textbox(label="Update Status")
 
 
 
 
574
 
575
  # Update both the output text and the document selector
576
  update_button.click(update_vectors,
 
67
  return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
68
 
69
  # File to store the list of uploaded documents
70
+ # File to store metadata about uploaded documents
71
+ METADATA_FILE = "document_metadata.json"
72
 
73
+ def load_document_metadata():
74
+ if os.path.exists(METADATA_FILE):
75
+ with open(METADATA_FILE, 'r') as f:
76
+ return json.load(f)
77
+ return []
 
 
78
 
79
+ def save_document_metadata(metadata):
80
+ with open(METADATA_FILE, 'w') as f:
81
+ json.dump(metadata, f)
82
 
83
  def update_vectors(files, parser):
 
84
  logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
85
 
86
  if not files:
87
  logging.warning("No files provided for update_vectors")
88
  return "Please upload at least one PDF file.", gr.CheckboxGroup(
89
+ choices=[],
90
+ value=[],
91
  label="Select documents to query"
92
  )
93
 
 
95
  total_chunks = 0
96
 
97
  all_data = []
98
+ metadata = load_document_metadata()
99
+
100
  for file in files:
101
  logging.info(f"Processing file: {file.name}")
102
  try:
 
104
  logging.info(f"Loaded {len(data)} chunks from {file.name}")
105
  all_data.extend(data)
106
  total_chunks += len(data)
107
+
108
+ # Update metadata
109
+ if not any(doc["name"] == file.name for doc in metadata):
110
+ metadata.append({"name": file.name, "selected": True})
111
+ logging.info(f"Added new document to metadata: {file.name}")
112
  else:
113
+ logging.info(f"Document already exists in metadata: {file.name}")
114
  except Exception as e:
115
  logging.error(f"Error processing file {file.name}: {str(e)}")
116
 
 
127
  database.save_local("faiss_database")
128
  logging.info("FAISS database saved")
129
 
130
+ # Save the updated metadata
131
+ save_document_metadata(metadata)
132
+ logging.info("Document metadata saved")
133
 
134
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
135
+ choices=[doc["name"] for doc in metadata],
136
+ value=[doc["name"] for doc in metadata if doc["selected"]],
137
  label="Select documents to query"
138
  )
139
 
 
511
  # Define the checkbox outside the demo block
512
  document_selector = gr.CheckboxGroup(label="Select documents to query")
513
 
514
+ document_selector = initialize_document_selector()
515
+
516
  use_web_search = gr.Checkbox(label="Use Web Search", value=True)
517
 
518
  custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
 
574
  update_button = gr.Button("Upload Document")
575
 
576
  update_output = gr.Textbox(label="Update Status")
577
+ document_selector = gr.CheckboxGroup(label="Select documents to query")
578
+
579
+ # Initialize document selector
580
+ document_selector = initialize_document_selector()
581
 
582
  # Update both the output text and the document selector
583
  update_button.click(update_vectors,