Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -66,37 +66,22 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
69 |
-
# File to store the list of uploaded documents
|
70 |
-
# File to store metadata about uploaded documents
|
71 |
-
METADATA_FILE = "document_metadata.json"
|
72 |
-
|
73 |
-
def load_document_metadata():
|
74 |
-
if os.path.exists(METADATA_FILE):
|
75 |
-
with open(METADATA_FILE, 'r') as f:
|
76 |
-
return json.load(f)
|
77 |
-
return []
|
78 |
-
|
79 |
-
def save_document_metadata(metadata):
|
80 |
-
with open(METADATA_FILE, 'w') as f:
|
81 |
-
json.dump(metadata, f)
|
82 |
-
|
83 |
def update_vectors(files, parser):
|
|
|
84 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
85 |
|
86 |
if not files:
|
87 |
logging.warning("No files provided for update_vectors")
|
88 |
return "Please upload at least one PDF file.", gr.CheckboxGroup(
|
89 |
-
choices=[],
|
90 |
-
value=[],
|
91 |
label="Select documents to query"
|
92 |
)
|
93 |
|
94 |
-
embed =
|
95 |
total_chunks = 0
|
96 |
|
97 |
all_data = []
|
98 |
-
metadata = load_document_metadata()
|
99 |
-
|
100 |
for file in files:
|
101 |
logging.info(f"Processing file: {file.name}")
|
102 |
try:
|
@@ -104,13 +89,12 @@ def update_vectors(files, parser):
|
|
104 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
105 |
all_data.extend(data)
|
106 |
total_chunks += len(data)
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
logging.info(f"Added new document to metadata: {file.name}")
|
112 |
else:
|
113 |
-
logging.info(f"Document already exists in
|
114 |
except Exception as e:
|
115 |
logging.error(f"Error processing file {file.name}: {str(e)}")
|
116 |
|
@@ -127,25 +111,9 @@ def update_vectors(files, parser):
|
|
127 |
database.save_local("faiss_database")
|
128 |
logging.info("FAISS database saved")
|
129 |
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
completion_message = f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}."
|
135 |
-
logging.info(completion_message)
|
136 |
-
|
137 |
-
updated_choices = [doc["name"] for doc in metadata]
|
138 |
-
updated_values = [doc["name"] for doc in metadata if doc["selected"]]
|
139 |
-
|
140 |
-
return completion_message, gr.CheckboxGroup.update(choices=updated_choices, value=updated_values, label="Select documents to query")
|
141 |
-
|
142 |
-
# Make sure to call this function at the start of your script
|
143 |
-
|
144 |
-
def initialize_document_selector():
|
145 |
-
metadata = load_document_metadata()
|
146 |
-
return gr.CheckboxGroup(
|
147 |
-
choices=[doc["name"] for doc in metadata],
|
148 |
-
value=[doc["name"] for doc in metadata if doc["selected"]],
|
149 |
label="Select documents to query"
|
150 |
)
|
151 |
|
@@ -520,8 +488,6 @@ def initial_conversation():
|
|
520 |
# Define the checkbox outside the demo block
|
521 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
522 |
|
523 |
-
document_selector = initialize_document_selector()
|
524 |
-
|
525 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
526 |
|
527 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
@@ -583,7 +549,6 @@ with demo:
|
|
583 |
update_button = gr.Button("Upload Document")
|
584 |
|
585 |
update_output = gr.Textbox(label="Update Status")
|
586 |
-
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
587 |
|
588 |
# Update both the output text and the document selector
|
589 |
update_button.click(update_vectors,
|
@@ -604,4 +569,4 @@ with demo:
|
|
604 |
)
|
605 |
|
606 |
if __name__ == "__main__":
|
607 |
-
demo.launch(share=True)
|
|
|
66 |
def get_embeddings():
|
67 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
def update_vectors(files, parser):
|
70 |
+
global uploaded_documents
|
71 |
logging.info(f"Entering update_vectors with {len(files)} files and parser: {parser}")
|
72 |
|
73 |
if not files:
|
74 |
logging.warning("No files provided for update_vectors")
|
75 |
return "Please upload at least one PDF file.", gr.CheckboxGroup(
|
76 |
+
choices=[doc["name"] for doc in uploaded_documents],
|
77 |
+
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
78 |
label="Select documents to query"
|
79 |
)
|
80 |
|
81 |
+
embed = get_embeddings()
|
82 |
total_chunks = 0
|
83 |
|
84 |
all_data = []
|
|
|
|
|
85 |
for file in files:
|
86 |
logging.info(f"Processing file: {file.name}")
|
87 |
try:
|
|
|
89 |
logging.info(f"Loaded {len(data)} chunks from {file.name}")
|
90 |
all_data.extend(data)
|
91 |
total_chunks += len(data)
|
92 |
+
# Append new documents instead of replacing
|
93 |
+
if not any(doc["name"] == file.name for doc in uploaded_documents):
|
94 |
+
uploaded_documents.append({"name": file.name, "selected": True})
|
95 |
+
logging.info(f"Added new document to uploaded_documents: {file.name}")
|
|
|
96 |
else:
|
97 |
+
logging.info(f"Document already exists in uploaded_documents: {file.name}")
|
98 |
except Exception as e:
|
99 |
logging.error(f"Error processing file {file.name}: {str(e)}")
|
100 |
|
|
|
111 |
database.save_local("faiss_database")
|
112 |
logging.info("FAISS database saved")
|
113 |
|
114 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", gr.CheckboxGroup(
|
115 |
+
choices=[doc["name"] for doc in uploaded_documents],
|
116 |
+
value=[doc["name"] for doc in uploaded_documents if doc["selected"]],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
label="Select documents to query"
|
118 |
)
|
119 |
|
|
|
488 |
# Define the checkbox outside the demo block
|
489 |
document_selector = gr.CheckboxGroup(label="Select documents to query")
|
490 |
|
|
|
|
|
491 |
use_web_search = gr.Checkbox(label="Use Web Search", value=True)
|
492 |
|
493 |
custom_placeholder = "Ask a question (Note: You can toggle between Web Search and PDF Chat in Additional Inputs below)"
|
|
|
549 |
update_button = gr.Button("Upload Document")
|
550 |
|
551 |
update_output = gr.Textbox(label="Update Status")
|
|
|
552 |
|
553 |
# Update both the output text and the document selector
|
554 |
update_button.click(update_vectors,
|
|
|
569 |
)
|
570 |
|
571 |
if __name__ == "__main__":
|
572 |
+
demo.launch(share=True)
|