Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,6 +11,7 @@ import torch
|
|
11 |
import pickle
|
12 |
import nltk
|
13 |
import faiss
|
|
|
14 |
|
15 |
# Ensure NLTK resources are downloaded
|
16 |
try:
|
@@ -41,7 +42,7 @@ hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-Mini
|
|
41 |
index_path = "faiss_index.pkl"
|
42 |
if os.path.exists(index_path):
|
43 |
with open(index_path, "rb") as f:
|
44 |
-
faiss_index =
|
45 |
print("Loaded FAISS index from faiss_index.pkl")
|
46 |
else:
|
47 |
# Create a new FAISS index
|
@@ -49,6 +50,7 @@ else:
|
|
49 |
nlist = 100 # Number of clusters (for IVF)
|
50 |
quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
|
51 |
faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
|
|
52 |
|
53 |
def extract_text_from_pdf(pdf_path):
|
54 |
text = ""
|
@@ -92,7 +94,7 @@ def upload_files(files):
|
|
92 |
embeddings = embedding_model.encode(sentences)
|
93 |
|
94 |
for embedding in embeddings:
|
95 |
-
faiss_index.add(np.array([embedding])) # Add each embedding individually
|
96 |
|
97 |
except Exception as e:
|
98 |
print(f"Error processing file '{file.name}': {e}")
|
@@ -137,6 +139,3 @@ with gr.Blocks() as demo:
|
|
137 |
query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
|
138 |
|
139 |
demo.launch()
|
140 |
-
|
141 |
-
|
142 |
-
|
|
|
11 |
import pickle
|
12 |
import nltk
|
13 |
import faiss
|
14 |
+
import numpy as np
|
15 |
|
16 |
# Ensure NLTK resources are downloaded
|
17 |
try:
|
|
|
42 |
index_path = "faiss_index.pkl"
|
43 |
if os.path.exists(index_path):
|
44 |
with open(index_path, "rb") as f:
|
45 |
+
faiss_index = faiss.read_index(f)
|
46 |
print("Loaded FAISS index from faiss_index.pkl")
|
47 |
else:
|
48 |
# Create a new FAISS index
|
|
|
50 |
nlist = 100 # Number of clusters (for IVF)
|
51 |
quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
|
52 |
faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
53 |
+
faiss_index.train(np.zeros((nlist, d)).astype(np.float32)) # Train the index with dummy data
|
54 |
|
55 |
def extract_text_from_pdf(pdf_path):
|
56 |
text = ""
|
|
|
94 |
embeddings = embedding_model.encode(sentences)
|
95 |
|
96 |
for embedding in embeddings:
|
97 |
+
faiss_index.add(np.array([embedding]).astype(np.float32)) # Add each embedding individually
|
98 |
|
99 |
except Exception as e:
|
100 |
print(f"Error processing file '{file.name}': {e}")
|
|
|
139 |
query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
|
140 |
|
141 |
demo.launch()
|
|
|
|
|
|