Spaces:
Running
Running
SUBHRAJIT MOHANTY
commited on
Commit
·
5d2f302
1
Parent(s):
8a3e144
app.py updated
Browse files
app.py
CHANGED
@@ -65,8 +65,8 @@ class Config:
|
|
65 |
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
66 |
COLLECTION_NAME = os.getenv("COLLECTION_NAME", "documents")
|
67 |
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
68 |
-
TOP_K = int(os.getenv("TOP_K", "
|
69 |
-
SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.
|
70 |
DEVICE = os.getenv("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
|
71 |
|
72 |
class ApplicationState:
|
@@ -299,9 +299,13 @@ class DocumentManager:
|
|
299 |
try:
|
300 |
await self._ensure_collection_exists()
|
301 |
|
|
|
|
|
302 |
# Generate query embedding
|
303 |
query_embedding = await self.embedding_service.get_query_embedding(query)
|
304 |
|
|
|
|
|
305 |
# Search in Qdrant
|
306 |
search_results = await self.qdrant_client.search(
|
307 |
collection_name=self.collection_name,
|
@@ -310,22 +314,29 @@ class DocumentManager:
|
|
310 |
score_threshold=min_score
|
311 |
)
|
312 |
|
|
|
|
|
313 |
# Format results
|
314 |
results = []
|
315 |
-
for result in search_results:
|
|
|
|
|
|
|
316 |
results.append({
|
317 |
"score": result.score,
|
318 |
-
"text":
|
319 |
"file_path": result.payload.get("file_path", ""),
|
320 |
"document_id": result.payload.get("document_id", ""),
|
321 |
"chunk_index": result.payload.get("chunk_index", 0)
|
322 |
})
|
323 |
|
324 |
-
print(f"✓ Found {len(results)} results for query: '{query}'")
|
325 |
return results
|
326 |
|
327 |
except Exception as e:
|
328 |
print(f"Error searching: {e}")
|
|
|
|
|
329 |
return []
|
330 |
|
331 |
async def list_documents(self) -> List[Dict[str, Any]]:
|
@@ -409,13 +420,31 @@ class RAGService:
|
|
409 |
print("Error: Document manager is not initialized")
|
410 |
return []
|
411 |
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
# Use the document manager's search functionality
|
413 |
results = await app_state.document_manager.search_documents(
|
414 |
query=query,
|
415 |
limit=top_k,
|
416 |
-
min_score=
|
417 |
)
|
418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
return results
|
420 |
|
421 |
except Exception as e:
|
|
|
65 |
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
|
66 |
COLLECTION_NAME = os.getenv("COLLECTION_NAME", "documents")
|
67 |
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
|
68 |
+
TOP_K = int(os.getenv("TOP_K", "10")) # Increased from 5
|
69 |
+
SIMILARITY_THRESHOLD = float(os.getenv("SIMILARITY_THRESHOLD", "0.1")) # Lowered from 0.7
|
70 |
DEVICE = os.getenv("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
|
71 |
|
72 |
class ApplicationState:
|
|
|
299 |
try:
|
300 |
await self._ensure_collection_exists()
|
301 |
|
302 |
+
print(f"Document Search - Query: '{query}', Limit: {limit}, Min Score: {min_score}")
|
303 |
+
|
304 |
# Generate query embedding
|
305 |
query_embedding = await self.embedding_service.get_query_embedding(query)
|
306 |
|
307 |
+
print(f"Document Search - Generated embedding vector of size: {len(query_embedding)}")
|
308 |
+
|
309 |
# Search in Qdrant
|
310 |
search_results = await self.qdrant_client.search(
|
311 |
collection_name=self.collection_name,
|
|
|
314 |
score_threshold=min_score
|
315 |
)
|
316 |
|
317 |
+
print(f"Document Search - Qdrant returned {len(search_results)} results")
|
318 |
+
|
319 |
# Format results
|
320 |
results = []
|
321 |
+
for i, result in enumerate(search_results):
|
322 |
+
content = result.payload.get("content", result.payload.get("chunk_text", ""))
|
323 |
+
print(f"Document Search - Result {i+1}: Score={result.score:.4f}, Content preview: {content[:100]}...")
|
324 |
+
|
325 |
results.append({
|
326 |
"score": result.score,
|
327 |
+
"text": content,
|
328 |
"file_path": result.payload.get("file_path", ""),
|
329 |
"document_id": result.payload.get("document_id", ""),
|
330 |
"chunk_index": result.payload.get("chunk_index", 0)
|
331 |
})
|
332 |
|
333 |
+
print(f"✓ Document Search - Found {len(results)} results for query: '{query}'")
|
334 |
return results
|
335 |
|
336 |
except Exception as e:
|
337 |
print(f"Error searching: {e}")
|
338 |
+
import traceback
|
339 |
+
traceback.print_exc()
|
340 |
return []
|
341 |
|
342 |
async def list_documents(self) -> List[Dict[str, Any]]:
|
|
|
420 |
print("Error: Document manager is not initialized")
|
421 |
return []
|
422 |
|
423 |
+
# Use a lower similarity threshold for RAG to get more results
|
424 |
+
# Try multiple thresholds if needed
|
425 |
+
min_score = 0.1 # Lower threshold for RAG
|
426 |
+
|
427 |
+
print(f"RAG Search - Query: '{query}', Limit: {top_k}, Min Score: {min_score}")
|
428 |
+
|
429 |
# Use the document manager's search functionality
|
430 |
results = await app_state.document_manager.search_documents(
|
431 |
query=query,
|
432 |
limit=top_k,
|
433 |
+
min_score=min_score
|
434 |
)
|
435 |
|
436 |
+
print(f"RAG Search - Found {len(results)} results")
|
437 |
+
|
438 |
+
# If no results with low threshold, try even lower
|
439 |
+
if not results:
|
440 |
+
print("No results with min_score=0.1, trying with min_score=0.0")
|
441 |
+
results = await app_state.document_manager.search_documents(
|
442 |
+
query=query,
|
443 |
+
limit=top_k,
|
444 |
+
min_score=0.0
|
445 |
+
)
|
446 |
+
print(f"RAG Search - Found {len(results)} results with min_score=0.0")
|
447 |
+
|
448 |
return results
|
449 |
|
450 |
except Exception as e:
|