SUBHRAJIT MOHANTY commited on
Commit
5d2f302
·
1 Parent(s): 8a3e144

app.py updated

Browse files
Files changed (1) hide show
  1. app.py +35 -6
app.py CHANGED
@@ -65,8 +65,8 @@ class Config:
65
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
66
  COLLECTION_NAME = os.getenv("COLLECTION_NAME", "documents")
67
  EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
68
- TOP_K = int(os.getenv("TOP_K", "5"))
69
- SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.7"))
70
  DEVICE = os.getenv("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
71
 
72
  class ApplicationState:
@@ -299,9 +299,13 @@ class DocumentManager:
299
  try:
300
  await self._ensure_collection_exists()
301
 
 
 
302
  # Generate query embedding
303
  query_embedding = await self.embedding_service.get_query_embedding(query)
304
 
 
 
305
  # Search in Qdrant
306
  search_results = await self.qdrant_client.search(
307
  collection_name=self.collection_name,
@@ -310,22 +314,29 @@ class DocumentManager:
310
  score_threshold=min_score
311
  )
312
 
 
 
313
  # Format results
314
  results = []
315
- for result in search_results:
 
 
 
316
  results.append({
317
  "score": result.score,
318
- "text": result.payload.get("content", result.payload.get("chunk_text", "")),
319
  "file_path": result.payload.get("file_path", ""),
320
  "document_id": result.payload.get("document_id", ""),
321
  "chunk_index": result.payload.get("chunk_index", 0)
322
  })
323
 
324
- print(f"✓ Found {len(results)} results for query: '{query}'")
325
  return results
326
 
327
  except Exception as e:
328
  print(f"Error searching: {e}")
 
 
329
  return []
330
 
331
  async def list_documents(self) -> List[Dict[str, Any]]:
@@ -409,13 +420,31 @@ class RAGService:
409
  print("Error: Document manager is not initialized")
410
  return []
411
 
 
 
 
 
 
 
412
  # Use the document manager's search functionality
413
  results = await app_state.document_manager.search_documents(
414
  query=query,
415
  limit=top_k,
416
- min_score=Config.SIMILARITY_THRESHOLD
417
  )
418
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  return results
420
 
421
  except Exception as e:
 
65
  QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
66
  COLLECTION_NAME = os.getenv("COLLECTION_NAME", "documents")
67
  EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
68
+ TOP_K = int(os.getenv("TOP_K", "10")) # Increased from 5
69
+ SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.1")) # Lowered from 0.7
70
  DEVICE = os.getenv("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
71
 
72
  class ApplicationState:
 
299
  try:
300
  await self._ensure_collection_exists()
301
 
302
+ print(f"Document Search - Query: '{query}', Limit: {limit}, Min Score: {min_score}")
303
+
304
  # Generate query embedding
305
  query_embedding = await self.embedding_service.get_query_embedding(query)
306
 
307
+ print(f"Document Search - Generated embedding vector of size: {len(query_embedding)}")
308
+
309
  # Search in Qdrant
310
  search_results = await self.qdrant_client.search(
311
  collection_name=self.collection_name,
 
314
  score_threshold=min_score
315
  )
316
 
317
+ print(f"Document Search - Qdrant returned {len(search_results)} results")
318
+
319
  # Format results
320
  results = []
321
+ for i, result in enumerate(search_results):
322
+ content = result.payload.get("content", result.payload.get("chunk_text", ""))
323
+ print(f"Document Search - Result {i+1}: Score={result.score:.4f}, Content preview: {content[:100]}...")
324
+
325
  results.append({
326
  "score": result.score,
327
+ "text": content,
328
  "file_path": result.payload.get("file_path", ""),
329
  "document_id": result.payload.get("document_id", ""),
330
  "chunk_index": result.payload.get("chunk_index", 0)
331
  })
332
 
333
+ print(f"✓ Document Search - Found {len(results)} results for query: '{query}'")
334
  return results
335
 
336
  except Exception as e:
337
  print(f"Error searching: {e}")
338
+ import traceback
339
+ traceback.print_exc()
340
  return []
341
 
342
  async def list_documents(self) -> List[Dict[str, Any]]:
 
420
  print("Error: Document manager is not initialized")
421
  return []
422
 
423
+ # Use a lower similarity threshold for RAG to get more results
424
+ # Try multiple thresholds if needed
425
+ min_score = 0.1 # Lower threshold for RAG
426
+
427
+ print(f"RAG Search - Query: '{query}', Limit: {top_k}, Min Score: {min_score}")
428
+
429
  # Use the document manager's search functionality
430
  results = await app_state.document_manager.search_documents(
431
  query=query,
432
  limit=top_k,
433
+ min_score=min_score
434
  )
435
 
436
+ print(f"RAG Search - Found {len(results)} results")
437
+
438
+ # If no results with low threshold, try even lower
439
+ if not results:
440
+ print("No results with min_score=0.1, trying with min_score=0.0")
441
+ results = await app_state.document_manager.search_documents(
442
+ query=query,
443
+ limit=top_k,
444
+ min_score=0.0
445
+ )
446
+ print(f"RAG Search - Found {len(results)} results with min_score=0.0")
447
+
448
  return results
449
 
450
  except Exception as e: