Chris4K commited on
Commit
939af78
·
verified ·
1 Parent(s): 4edeecd

Update services/pdf_service.py

Browse files
Files changed (1) hide show
  1. services/pdf_service.py +16 -27
services/pdf_service.py CHANGED
@@ -128,46 +128,35 @@ class PDFService:
128
  await self.index_pdfs()
129
 
130
  try:
131
- # Get query embedding using thread pool
132
- #loop = asyncio.get_running_loop()
133
- #query_embedding = await loop.run_in_executor(
134
- # None,
135
- # lambda: self.embedder.encode(
136
- # [query],
137
- # convert_to_tensor=True
138
- # ).cpu().detach().numpy()
139
- #)
140
-
141
- # Search
142
- #distances, indices = self.index.search(query_embedding, top_k * 2)
143
-
144
  # Create query embedding
145
  query_embedding = self.embedder.encode([query], convert_to_tensor=True)
146
  query_embedding_np = query_embedding.cpu().detach().numpy()
147
-
148
  # Search in FAISS index
149
  distances, indices = self.index.search(query_embedding_np, top_k)
150
-
151
-
152
-
153
  # Process results
154
  results = []
155
  for i, idx in enumerate(indices[0]):
156
- if idx >= len(self.chunks) or distances[0][i] > min_score:
157
- continue
158
-
 
 
 
 
159
  chunk = self.chunks[idx].copy()
160
- chunk['score'] = float(1 - distances[0][i])
161
  results.append(chunk)
162
-
163
  # Sort by score and take top_k
164
  results.sort(key=lambda x: x['score'], reverse=True)
165
-
166
- print("--------------------------- results ----------------------------------")
167
  print(results)
168
-
169
  return results[:top_k]
170
-
171
  except Exception as e:
172
  logger.error(f"Error searching PDFs: {e}")
173
- raise
 
128
  await self.index_pdfs()
129
 
130
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  # Create query embedding
132
  query_embedding = self.embedder.encode([query], convert_to_tensor=True)
133
  query_embedding_np = query_embedding.cpu().detach().numpy()
134
+
135
  # Search in FAISS index
136
  distances, indices = self.index.search(query_embedding_np, top_k)
137
+
 
 
138
  # Process results
139
  results = []
140
  for i, idx in enumerate(indices[0]):
141
+ if idx >= len(self.chunks):
142
+ continue # Skip invalid indices
143
+
144
+ score = 1 - distances[0][i] # Calculate similarity score
145
+ if score < min_score:
146
+ continue # Skip low scores
147
+
148
  chunk = self.chunks[idx].copy()
149
+ chunk['score'] = score
150
  results.append(chunk)
151
+
152
  # Sort by score and take top_k
153
  results.sort(key=lambda x: x['score'], reverse=True)
154
+
155
+ print("--------------------------- results ----------------------------------")
156
  print(results)
157
+
158
  return results[:top_k]
159
+
160
  except Exception as e:
161
  logger.error(f"Error searching PDFs: {e}")
162
+ raise