Chris4K commited on
Commit
cf6524f
·
verified ·
1 Parent(s): 939af78

Update services/pdf_service.py

Browse files
Files changed (1) hide show
  1. services/pdf_service.py +10 -6
services/pdf_service.py CHANGED
@@ -121,30 +121,34 @@ class PDFService:
121
  top_k: int = 5,
122
  min_score: float = 0.5
123
  ) -> List[Dict[str, Any]]:
124
- """Search indexed PDFs"""
125
  print("--------------------------- query ----------------------------------")
126
  print(query)
127
  if not self.index or not self.chunks:
128
  await self.index_pdfs()
129
-
130
  try:
131
  # Create query embedding
132
  query_embedding = self.embedder.encode([query], convert_to_tensor=True)
133
  query_embedding_np = query_embedding.cpu().detach().numpy()
 
134
 
135
  # Search in FAISS index
136
  distances, indices = self.index.search(query_embedding_np, top_k)
 
 
137
 
138
  # Process results
139
  results = []
140
  for i, idx in enumerate(indices[0]):
141
  if idx >= len(self.chunks):
142
  continue # Skip invalid indices
143
-
144
- score = 1 - distances[0][i] # Calculate similarity score
 
145
  if score < min_score:
146
  continue # Skip low scores
147
-
148
  chunk = self.chunks[idx].copy()
149
  chunk['score'] = score
150
  results.append(chunk)
@@ -156,7 +160,7 @@ class PDFService:
156
  print(results)
157
 
158
  return results[:top_k]
159
-
160
  except Exception as e:
161
  logger.error(f"Error searching PDFs: {e}")
162
  raise
 
121
  top_k: int = 5,
122
  min_score: float = 0.5
123
  ) -> List[Dict[str, Any]]:
124
+ """Search indexed PDFs with debug logs"""
125
  print("--------------------------- query ----------------------------------")
126
  print(query)
127
  if not self.index or not self.chunks:
128
  await self.index_pdfs()
129
+
130
  try:
131
  # Create query embedding
132
  query_embedding = self.embedder.encode([query], convert_to_tensor=True)
133
  query_embedding_np = query_embedding.cpu().detach().numpy()
134
+ print("Query Embedding Shape:", query_embedding_np.shape)
135
 
136
  # Search in FAISS index
137
  distances, indices = self.index.search(query_embedding_np, top_k)
138
+ print("Distances:", distances)
139
+ print("Indices:", indices)
140
 
141
  # Process results
142
  results = []
143
  for i, idx in enumerate(indices[0]):
144
  if idx >= len(self.chunks):
145
  continue # Skip invalid indices
146
+
147
+ score = 1 - distances[0][i] # Convert distance to similarity score
148
+ print(f"Chunk Index: {idx}, Distance: {distances[0][i]}, Score: {score}")
149
  if score < min_score:
150
  continue # Skip low scores
151
+
152
  chunk = self.chunks[idx].copy()
153
  chunk['score'] = score
154
  results.append(chunk)
 
160
  print(results)
161
 
162
  return results[:top_k]
163
+
164
  except Exception as e:
165
  logger.error(f"Error searching PDFs: {e}")
166
  raise