NaimaAqeel commited on
Commit
124e62a
·
verified ·
1 Parent(s): 56ec544

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -10,6 +10,7 @@ from nltk.tokenize import sent_tokenize
10
  import torch
11
  import pickle
12
  import nltk
 
13
 
14
  # Ensure NLTK resources are downloaded
15
  try:
@@ -43,7 +44,11 @@ if os.path.exists(index_path):
43
  faiss_index = pickle.load(f)
44
  print("Loaded FAISS index from faiss_index.pkl")
45
  else:
46
- faiss_index = FAISS()
 
 
 
 
47
 
48
  def extract_text_from_pdf(pdf_path):
49
  text = ""
@@ -86,15 +91,15 @@ def upload_files(files):
86
  sentences = preprocess_text(text)
87
  embeddings = embedding_model.encode(sentences)
88
 
89
- for sentence, embedding in zip(sentences, embeddings):
90
- faiss_index.add_sentence(sentence, embedding)
91
 
92
  except Exception as e:
93
  print(f"Error processing file '{file.name}': {e}")
94
  return {"error": str(e)}
95
 
96
- with open(index_path, "wb") as f:
97
- pickle.dump(faiss_index, f)
98
 
99
  return {"message": "Files processed successfully"}
100
 
 
10
  import torch
11
  import pickle
12
  import nltk
13
+ import faiss
14
 
15
  # Ensure NLTK resources are downloaded
16
  try:
 
44
  faiss_index = pickle.load(f)
45
  print("Loaded FAISS index from faiss_index.pkl")
46
  else:
47
+ # Create a new FAISS index
48
+ d = embedding_model.get_sentence_embedding_dimension() # Dimension of the embeddings
49
+ nlist = 100 # Number of clusters (for IVF)
50
+ quantizer = faiss.IndexFlatL2(d) # This is the quantizer for IVF
51
+ faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
52
 
53
  def extract_text_from_pdf(pdf_path):
54
  text = ""
 
91
  sentences = preprocess_text(text)
92
  embeddings = embedding_model.encode(sentences)
93
 
94
+ for embedding in embeddings:
95
+ faiss_index.add(np.array([embedding])) # Add each embedding individually
96
 
97
  except Exception as e:
98
  print(f"Error processing file '{file.name}': {e}")
99
  return {"error": str(e)}
100
 
101
+ # Save the updated index
102
+ faiss.write_index(faiss_index, index_path)
103
 
104
  return {"message": "Files processed successfully"}
105