Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -67,29 +67,27 @@ def get_embeddings():
|
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
69 |
# File to store the list of uploaded documents
|
70 |
-
|
|
|
71 |
|
72 |
-
def
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
else:
|
78 |
-
uploaded_documents = []
|
79 |
|
80 |
-
def
|
81 |
-
with open(
|
82 |
-
json.dump(
|
83 |
|
84 |
def update_vectors(files, parser):
|
85 |
-
global uploaded_documents
|
86 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
87 |
|
88 |
if not files:
|
89 |
logging.warning("No files provided for update_vectors")
|
90 |
return "Please upload at least one PDF file.", gr.CheckboxGroup(
|
91 |
-
choices=[
|
92 |
-
value=[
|
93 |
label="Select documents to query"
|
94 |
)
|
95 |
|
@@ -97,6 +95,8 @@ def update_vectors(files, parser):
|
|
97 |
total_chunks = 0
|
98 |
|
99 |
all_data = []
|
|
|
|
|
100 |
for file in files:
|
101 |
logging.info(f"Processing file: {file.name}")
|
102 |
try:
|
@@ -104,12 +104,13 @@ def update_vectors(files, parser):
|
|
104 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
105 |
all_data.extend(data)
|
106 |
total_chunks += len(data)
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
|
|
111 |
else:
|
112 |
-
logging.info(f"Document already exists in
|
113 |
except Exception as e:
|
114 |
logging.error(f"Error processing file {file.name}: {str(e)}")
|
115 |
|
@@ -126,13 +127,13 @@ def update_vectors(files, parser):
|
|
126 |
database.save_local("faiss_database")
|
127 |
logging.info("FAISS database saved")
|
128 |
|
129 |
-
# Save the updated
|
130 |
-
|
131 |
-
logging.info("
|
132 |
|
133 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
|
134 |
-
choices=[doc["name"] for doc in
|
135 |
-
value=[doc["name"] for doc in
|
136 |
label="Select documents to query"
|
137 |
)
|
138 |
|
@@ -510,6 +511,8 @@ def initial_conversation():
|
|
510 |
# Define the checkbox outside the demo block
|
511 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
512 |
|
|
|
|
|
513 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
514 |
|
515 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
@@ -571,6 +574,10 @@ with demo:
|
|
571 |
update_button = gr.Button("Upload Document")
|
572 |
|
573 |
update_output = gr.Textbox(label="Update Status")
|
|
|
|
|
|
|
|
|
574 |
|
575 |
# Update both the output text and the document selector
|
576 |
update_button.click(update_vectors,
|
|
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
69 |
# File to store the list of uploaded documents
|
70 |
+
# File to store metadata about uploaded documents
|
71 |
+
METADATA_FILE = "document_metadata.json"
|
72 |
|
73 |
+
def load_document_metadata():
|
74 |
+
if os.path.exists(METADATA_FILE):
|
75 |
+
with open(METADATA_FILE, 'r') as f:
|
76 |
+
return json.load(f)
|
77 |
+
return []
|
|
|
|
|
78 |
|
79 |
+
def save_document_metadata(metadata):
|
80 |
+
with open(METADATA_FILE, 'w') as f:
|
81 |
+
json.dump(metadata, f)
|
82 |
|
83 |
def update_vectors(files, parser):
|
|
|
84 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
85 |
|
86 |
if not files:
|
87 |
logging.warning("No files provided for update_vectors")
|
88 |
return "Please upload at least one PDF file.", gr.CheckboxGroup(
|
89 |
+
choices=[],
|
90 |
+
value=[],
|
91 |
label="Select documents to query"
|
92 |
)
|
93 |
|
|
|
95 |
total_chunks = 0
|
96 |
|
97 |
all_data = []
|
98 |
+
metadata = load_document_metadata()
|
99 |
+
|
100 |
for file in files:
|
101 |
logging.info(f"Processing file: {file.name}")
|
102 |
try:
|
|
|
104 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
105 |
all_data.extend(data)
|
106 |
total_chunks += len(data)
|
107 |
+
|
108 |
+
# Update metadata
|
109 |
+
if not any(doc["name"] == file.name for doc in metadata):
|
110 |
+
metadata.append({"name": file.name, "selected": True})
|
111 |
+
logging.info(f"Added new document to metadata: {file.name}")
|
112 |
else:
|
113 |
+
logging.info(f"Document already exists in metadata: {file.name}")
|
114 |
except Exception as e:
|
115 |
logging.error(f"Error processing file {file.name}: {str(e)}")
|
116 |
|
|
|
127 |
database.save_local("faiss_database")
|
128 |
logging.info("FAISS database saved")
|
129 |
|
130 |
+
# Save the updated metadata
|
131 |
+
save_document_metadata(metadata)
|
132 |
+
logging.info("Document metadata saved")
|
133 |
|
134 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
|
135 |
+
choices=[doc["name"] for doc in metadata],
|
136 |
+
value=[doc["name"] for doc in metadata if doc["selected"]],
|
137 |
label="Select documents to query"
|
138 |
)
|
139 |
|
|
|
511 |
# Define the checkbox outside the demo block
|
512 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
513 |
|
514 |
+
document_selector = initialize_document_selector()
|
515 |
+
|
516 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
517 |
|
518 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
|
|
574 |
update_button = gr.Button("Upload Document")
|
575 |
|
576 |
update_output = gr.Textbox(label="Update Status")
|
577 |
+
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
578 |
+
|
579 |
+
# Initialize document selector
|
580 |
+
document_selector = initialize_document_selector()
|
581 |
|
582 |
# Update both the output text and the document selector
|
583 |
update_button.click(update_vectors,
|