Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -76,7 +76,8 @@ else:
|
|
76 |
def upload_files(files):
|
77 |
global index, document_texts
|
78 |
try:
|
79 |
-
for
|
|
|
80 |
if file_path.endswith('.pdf'):
|
81 |
text = extract_text_from_pdf(file_path)
|
82 |
elif file_path.endswith('.docx'):
|
@@ -84,9 +85,12 @@ def upload_files(files):
|
|
84 |
else:
|
85 |
return "Unsupported file format"
|
86 |
|
|
|
|
|
87 |
# Process the text and update FAISS index
|
88 |
sentences = text.split("\n")
|
89 |
embeddings = embedding_model.encode(sentences)
|
|
|
90 |
index.add(np.array(embeddings))
|
91 |
document_texts.append(text)
|
92 |
|
@@ -105,11 +109,15 @@ def upload_files(files):
|
|
105 |
|
106 |
def query_text(text):
|
107 |
try:
|
|
|
|
|
108 |
# Encode the query text
|
109 |
query_embedding = embedding_model.encode([text])
|
|
|
110 |
|
111 |
# Search the FAISS index
|
112 |
D, I = index.search(np.array(query_embedding), k=5)
|
|
|
113 |
|
114 |
top_documents = []
|
115 |
for idx in I[0]:
|
@@ -148,6 +156,7 @@ demo.launch()
|
|
148 |
|
149 |
|
150 |
|
|
|
151 |
|
152 |
|
153 |
|
|
|
76 |
def upload_files(files):
|
77 |
global index, document_texts
|
78 |
try:
|
79 |
+
for file in files:
|
80 |
+
file_path = file.name # Get the file path from the NamedString object
|
81 |
if file_path.endswith('.pdf'):
|
82 |
text = extract_text_from_pdf(file_path)
|
83 |
elif file_path.endswith('.docx'):
|
|
|
85 |
else:
|
86 |
return "Unsupported file format"
|
87 |
|
88 |
+
print(f"Extracted text: {text[:100]}...") # Debug: Show the first 100 characters of the extracted text
|
89 |
+
|
90 |
# Process the text and update FAISS index
|
91 |
sentences = text.split("\n")
|
92 |
embeddings = embedding_model.encode(sentences)
|
93 |
+
print(f"Embeddings shape: {embeddings.shape}") # Debug: Show the shape of the embeddings
|
94 |
index.add(np.array(embeddings))
|
95 |
document_texts.append(text)
|
96 |
|
|
|
109 |
|
110 |
def query_text(text):
|
111 |
try:
|
112 |
+
print(f"Query text: {text}") # Debug: Show the query text
|
113 |
+
|
114 |
# Encode the query text
|
115 |
query_embedding = embedding_model.encode([text])
|
116 |
+
print(f"Query embedding shape: {query_embedding.shape}") # Debug: Show the shape of the query embedding
|
117 |
|
118 |
# Search the FAISS index
|
119 |
D, I = index.search(np.array(query_embedding), k=5)
|
120 |
+
print(f"Distances: {D}, Indices: {I}") # Debug: Show the distances and indices of the search results
|
121 |
|
122 |
top_documents = []
|
123 |
for idx in I[0]:
|
|
|
156 |
|
157 |
|
158 |
|
159 |
+
|
160 |
|
161 |
|
162 |
|