NaimaAqeel commited on
Commit
8c85ad8
·
verified ·
1 Parent(s): 124e62a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -5
app.py CHANGED
@@ -11,6 +11,7 @@ import torch
11
  import pickle
12
  import nltk
13
  import faiss
 
14
 
15
  # Ensure NLTK resources are downloaded
16
  try:
@@ -41,7 +42,7 @@ hf_embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-Mini
41
  index_path = "faiss_index.pkl"
42
  if os.path.exists(index_path):
43
  with open(index_path, "rb") as f:
44
- faiss_index = pickle.load(f)
45
  print("Loaded FAISS index from faiss_index.pkl")
46
  else:
47
  # Create a new FAISS index
@@ -49,6 +50,7 @@ else:
49
  nlist = 100 # Number of clusters (for IVF)
50
  quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
51
  faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
 
52
 
53
  def extract_text_from_pdf(pdf_path):
54
  text = ""
@@ -92,7 +94,7 @@ def upload_files(files):
92
  embeddings = embedding_model.encode(sentences)
93
 
94
  for embedding in embeddings:
95
- faiss_index.add(np.array([embedding])) # Add each embedding individually
96
 
97
  except Exception as e:
98
  print(f"Error processing file '{file.name}': {e}")
@@ -137,6 +139,3 @@ with gr.Blocks() as demo:
137
  query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
138
 
139
  demo.launch()
140
-
141
-
142
-
 
11
  import pickle
12
  import nltk
13
  import faiss
14
+ import numpy as np
15
 
16
  # Ensure NLTK resources are downloaded
17
  try:
 
42
  index_path = "faiss_index.pkl"
43
  if os.path.exists(index_path):
44
  with open(index_path, "rb") as f:
45
+ faiss_index = faiss.read_index(f)
46
  print("Loaded FAISS index from faiss_index.pkl")
47
  else:
48
  # Create a new FAISS index
 
50
  nlist = 100 # Number of clusters (for IVF)
51
  quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
52
  faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
53
+ faiss_index.train(np.zeros((nlist, d)).astype(np.float32)) # Train the index with dummy data
54
 
55
  def extract_text_from_pdf(pdf_path):
56
  text = ""
 
94
  embeddings = embedding_model.encode(sentences)
95
 
96
  for embedding in embeddings:
97
+ faiss_index.add(np.array([embedding]).astype(np.float32)) # Add each embedding individually
98
 
99
  except Exception as e:
100
  print(f"Error processing file '{file.name}': {e}")
 
139
  query_button.click(fn=process_and_query, inputs=[query], outputs=query_output)
140
 
141
  demo.launch()