Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ from nltk.tokenize import sent_tokenize
|
|
10 |
import torch
|
11 |
import pickle
|
12 |
import nltk
|
|
|
13 |
|
14 |
# Ensure NLTK resources are downloaded
|
15 |
try:
|
@@ -43,7 +44,11 @@ if os.path.exists(index_path):
|
|
43 |
faiss_index = pickle.load(f)
|
44 |
print("Loaded FAISS index from faiss_index.pkl")
|
45 |
else:
|
46 |
-
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def extract_text_from_pdf(pdf_path):
|
49 |
text = ""
|
@@ -86,15 +91,15 @@ def upload_files(files):
|
|
86 |
sentences = preprocess_text(text)
|
87 |
embeddings = embedding_model.encode(sentences)
|
88 |
|
89 |
-
for
|
90 |
-
faiss_index.
|
91 |
|
92 |
except Exception as e:
|
93 |
print(f"Error processing file '{file.name}': {e}")
|
94 |
return {"error": str(e)}
|
95 |
|
96 |
-
|
97 |
-
|
98 |
|
99 |
return {"message": "Files processed successfully"}
|
100 |
|
|
|
10 |
import torch
|
11 |
import pickle
|
12 |
import nltk
|
13 |
+
import faiss
|
14 |
|
15 |
# Ensure NLTK resources are downloaded
|
16 |
try:
|
|
|
44 |
faiss_index = pickle.load(f)
|
45 |
print("Loaded FAISS index from faiss_index.pkl")
|
46 |
else:
|
47 |
+
# Create a new FAISS index
|
48 |
+
d = embedding_model.get_sentence_embedding_dimension() # Dimension of the embeddings
|
49 |
+
nlist = 100 # Number of clusters (for IVF)
|
50 |
+
quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
|
51 |
+
faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
52 |
|
53 |
def extract_text_from_pdf(pdf_path):
|
54 |
text = ""
|
|
|
91 |
sentences = preprocess_text(text)
|
92 |
embeddings = embedding_model.encode(sentences)
|
93 |
|
94 |
+
for embedding in embeddings:
|
95 |
+
faiss_index.add(np.array([embedding])) # Add each embedding individually
|
96 |
|
97 |
except Exception as e:
|
98 |
print(f"Error processing file '{file.name}': {e}")
|
99 |
return {"error": str(e)}
|
100 |
|
101 |
+
# Save the updated index
|
102 |
+
faiss.write_index(faiss_index, index_path)
|
103 |
|
104 |
return {"message": "Files processed successfully"}
|
105 |
|