Spaces:

quantumbit
/

rag-bajaj

Sleeping

App Files Files Community

quantumbit commited on 12 days ago

Commit

c1a4784

verified ·

1 Parent(s): 3fe7b63

Delete RAG

Browse files

Files changed (10) hide show

RAG/__init__.py +0 -1
RAG/advanced_rag_processor.py +0 -169
RAG/rag_embeddings/.gitkeep +0 -0
RAG/rag_modules/__init__.py +0 -1
RAG/rag_modules/answer_generator.py +0 -97
RAG/rag_modules/context_manager.py +0 -81
RAG/rag_modules/embedding_manager.py +0 -42
RAG/rag_modules/query_expansion.py +0 -128
RAG/rag_modules/reranking_manager.py +0 -63
RAG/rag_modules/search_manager.py +0 -334

RAG/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # RAG Package

RAG/advanced_rag_processor.py DELETED Viewed

@@ -1,169 +0,0 @@
-"""
-Advanced RAG Processor - Modular Version
-Orchestrates all RAG components for document question answering.
-Version: 3.0 - Modular Architecture
-"""
-import time
-from typing import Dict, Tuple
-from pathlib import Path
-# Import all modular components
-from RAG.rag_modules.query_expansion import QueryExpansionManager
-from RAG.rag_modules.embedding_manager import EmbeddingManager
-from RAG.rag_modules.search_manager import SearchManager
-from RAG.rag_modules.reranking_manager import RerankingManager
-from RAG.rag_modules.context_manager import ContextManager
-from RAG.rag_modules.answer_generator import AnswerGenerator
-from LLM.llm_handler import llm_handler
-from config.config import OUTPUT_DIR, TOP_K
-class AdvancedRAGProcessor:
-    """
-    Advanced RAG processor with modular architecture for better maintainability.
-    Orchestrates query expansion, hybrid search, reranking, and answer generation.
-    """
-    def __init__(self):
-        """Initialize the advanced RAG processor with all modules."""
-        self.base_db_path = Path(OUTPUT_DIR)
-        # Initialize all managers
-        print("🚀 Initializing Advanced RAG Processor (Modular)...")
-        # Core components
-        self.embedding_manager = EmbeddingManager()
-        self.query_expansion_manager = QueryExpansionManager()
-        self.search_manager = SearchManager(self.embedding_manager)
-        self.reranking_manager = RerankingManager()
-        self.context_manager = ContextManager()
-        self.answer_generator = AnswerGenerator()
-        # Keep reference to LLM handler for info
-        self.llm_handler = llm_handler
-        print(f"✅ Advanced RAG Processor initialized with {self.llm_handler.provider.upper()} LLM")
-        print("📦 All modules loaded successfully:")
-        print("   🔄 Query Expansion Manager")
-        print("   🧠 Embedding Manager")
-        print("   🔍 Search Manager (Hybrid)")
-        print("   🎯 Reranking Manager")
-        print("   📝 Context Manager")
-        print("   💬 Answer Generator")
-    async def answer_question(self, question: str, doc_id: str, logger=None, request_id: str = None) -> Tuple[str, Dict[str, float]]:
-        """
-        Answer a question using advanced RAG techniques with detailed timing.
-        Args:
-            question: The question to answer
-            doc_id: Document ID to search in
-            logger: Optional logger for tracking
-            request_id: Optional request ID for logging
-        Returns:
-            Tuple of (answer, timing_breakdown)
-        """
-        timings = {}
-        overall_start = time.time()
-        try:
-            # Check if collection exists
-            collection_name = f"{doc_id}_collection"
-            try:
-                client = self.search_manager.get_qdrant_client(doc_id)
-                collection_info = client.get_collection(collection_name)
-            except Exception:
-                return "I don't have information about this document. Please ensure the document has been processed.", timings
-            print(f"🚀 Advanced RAG processing for: {question[:100]}...")
-            # Step 1: Query Expansion
-            step_start = time.time()
-            expanded_queries = await self.query_expansion_manager.expand_query(question)
-            expansion_time = time.time() - step_start
-            timings['query_expansion'] = expansion_time
-            if logger and request_id:
-                logger.log_pipeline_stage(request_id, "query_expansion", expansion_time)
-            # Step 2: Hybrid Search with Fusion
-            step_start = time.time()
-            search_results = await self.search_manager.hybrid_search(expanded_queries, doc_id, TOP_K)
-            search_time = time.time() - step_start
-            timings['hybrid_search'] = search_time
-            if logger and request_id:
-                logger.log_pipeline_stage(request_id, "hybrid_search", search_time)
-            if not search_results:
-                return "I couldn't find relevant information to answer your question.", timings
-            # Step 3: Reranking
-            step_start = time.time()
-            reranked_results = await self.reranking_manager.rerank_results(question, search_results)
-            rerank_time = time.time() - step_start
-            timings['reranking'] = rerank_time
-            if logger and request_id:
-                logger.log_pipeline_stage(request_id, "reranking", rerank_time)
-            # Step 4: Multi-perspective Context Creation
-            step_start = time.time()
-            context = self.context_manager.create_enhanced_context(question, reranked_results)
-            context_time = time.time() - step_start
-            timings['context_creation'] = context_time
-            if logger and request_id:
-                logger.log_pipeline_stage(request_id, "context_creation", context_time)
-            # Step 5: Enhanced Answer Generation
-            step_start = time.time()
-            answer = await self.answer_generator.generate_enhanced_answer(question, context, expanded_queries)
-            generation_time = time.time() - step_start
-            timings['llm_generation'] = generation_time
-            if logger and request_id:
-                logger.log_pipeline_stage(request_id, "llm_generation", generation_time)
-            # Calculate total time
-            total_time = time.time() - overall_start
-            timings['total_pipeline'] = total_time
-            print(f"\n✅ Advanced RAG processing completed in {total_time:.4f}s")
-            print(f"   🔍 Query expansion: {expansion_time:.4f}s")
-            print(f"   🔎 Hybrid search: {search_time:.4f}s")
-            print(f"   🎯 Reranking: {rerank_time:.4f}s")
-            print(f"   📝 Context creation: {context_time:.4f}s")
-            print(f"   💬 LLM generation: {generation_time:.4f}s")
-            return answer, timings
-        except Exception as e:
-            error_time = time.time() - overall_start
-            timings['error_time'] = error_time
-            print(f"❌ Error in advanced RAG processing: {str(e)}")
-            return f"I encountered an error while processing your question: {str(e)}", timings
-    def cleanup(self):
-        """Cleanup all manager resources."""
-        print("🧹 Cleaning up Advanced RAG processor resources...")
-        # Cleanup search manager (which has the most resources)
-        self.search_manager.cleanup()
-        print("✅ Advanced RAG cleanup completed")
-    def get_system_info(self) -> Dict:
-        """Get information about the RAG system."""
-        return {
-            "version": "3.0 - Modular",
-            "llm_provider": self.llm_handler.provider,
-            "llm_model": self.llm_handler.model_name,
-            "modules": [
-                "QueryExpansionManager",
-                "EmbeddingManager",
-                "SearchManager",
-                "RerankingManager",
-                "ContextManager",
-                "AnswerGenerator"
-            ],
-            "base_db_path": str(self.base_db_path)
-        }

RAG/rag_embeddings/.gitkeep DELETED Viewed

File without changes

RAG/rag_modules/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- # RAG Modules Package

RAG/rag_modules/answer_generator.py DELETED Viewed

@@ -1,97 +0,0 @@
-"""
-Answer Generation Module for Advanced RAG
-Handles LLM-based answer generation with enhanced prompting.
-"""
-from typing import List
-from LLM.llm_handler import llm_handler
-from config.config import TEMPERATURE, MAX_TOKENS
-class AnswerGenerator:
-    """Manages answer generation using LLM."""
-    def __init__(self):
-        """Initialize the answer generator."""
-        self.llm_handler = llm_handler
-        print("✅ Answer Generator initialized")
-    async def generate_enhanced_answer(self, original_question: str, context: str, expanded_queries: List[str]) -> str:
-        """Generate enhanced answer using the original question with retrieved context."""
-        # Use only the original question for LLM generation
-        query_context = f"Question: {original_question}"
-        system_prompt = """
-You are an expert AI assistant specializing in document analysis and policy-related question answering. You have access to relevant document excerpts and must respond only based on this information. You are designed specifically for analyzing official documents and answering user queries related to them.
-STRICT RULES AND RESPONSE CONDITIONS:
-    Irrelevant/Out-of-Scope Queries (e.g., programming help, general product info, coding tasks):
-    Respond EXACTLY:
-        "I cannot help with that. I am designed only to answer queries related to the provided document excerpts."
-    Illegal or Prohibited Requests (e.g., forgery, fraud, bypassing regulations):
-    Respond CLEARLY that the request is illegal. Example format:
-        "This request is illegal and cannot be supported. According to the applicable regulations in the document, [explain why it's illegal if mentioned]. Engaging in such activity may lead to legal consequences."
-        If illegality is not explicitly in the documents, use:
-        "This request involves illegal activity and is against policy. I cannot assist with this."
-    Nonexistent Concepts, Schemes, or Entities:
-    Respond by stating the concept does not exist and offer clarification by pointing to related valid information. Example:
-        "There is no mention of such a scheme in the document. However, the following related schemes are described: [summarize relevant ones]."
-    Valid Topics with Missing or Incomplete Information:
-    Respond that the exact answer is unavailable, then provide all related details and recommend official contact. Example:
-        "The exact information is not available in the provided document. However, here is what is relevant: [details]. For further clarification, you may contact: [official contact details if included in the document]."
-    Valid Questions Answerable from Document:
-    Provide a concise and accurate answer with clear reference to the document content. Also include any related notes that might aid understanding. Example:
-        "[Answer]. According to the policy document, [quote/summary from actual document content]."
-GENERAL ANSWERING RULES:
-    Use ONLY the provided document excerpts. Never use external knowledge.
-    Be concise: 5-6 sentences per answer, with all the details available for that particular query.
-    Start directly with the answer. Do not restate or rephrase the question.
-    Never speculate or elaborate beyond what is explicitly stated.
-    When referencing information, mention "according to the document" or "as stated in the policy" rather than using internal labels like "Query X Doc Y".
-    Do not reference internal organizational labels like [Query 1 Doc 2] or [Relevance: X.XX] - these are for processing only.
-    Focus on the actual document content and policy information when providing answers.
-The user may phrase questions in various ways — always infer the intent, apply the rules above, and respond accordingly.
-"""
-        user_prompt = f"""{query_context}
-Document Excerpts:
-{context}
-Provide a comprehensive answer based on the document excerpts above:"""
-        try:
-            answer = await self.llm_handler.generate_text(
-                system_prompt=system_prompt,
-                user_prompt=user_prompt,
-                temperature=TEMPERATURE,
-                max_tokens=MAX_TOKENS
-            )
-            return answer.strip()
-        except Exception as e:
-            print(f"❌ Error generating enhanced response with {self.llm_handler.provider.upper()}: {str(e)}")
-            return "I encountered an error while generating the response."

RAG/rag_modules/context_manager.py DELETED Viewed

@@ -1,81 +0,0 @@
-"""
-Context Management Module for Advanced RAG
-Handles context creation and management for LLM generation.
-"""
-from typing import List, Dict
-from collections import defaultdict
-from config.config import MAX_CONTEXT_LENGTH
-class ContextManager:
-    """Manages context creation for LLM generation."""
-    def __init__(self):
-        """Initialize the context manager."""
-        print("✅ Context Manager initialized")
-    def create_enhanced_context(self, question: str, results: List[Dict], max_length: int = MAX_CONTEXT_LENGTH) -> str:
-        """Create enhanced context ensuring each query contributes equally."""
-        # Group results by expanded query index
-        query_to_chunks = defaultdict(list)
-        for i, result in enumerate(results):
-            # Find the most relevant expanded query for this chunk
-            if 'contributing_queries' in result and result['contributing_queries']:
-                # Use the highest scoring contributing query
-                best_contrib = max(result['contributing_queries'], key=lambda cq: cq.get('semantic_score', cq.get('bm25_score', 0)))
-                query_idx = best_contrib['query_idx']
-            else:
-                query_idx = 0  # fallback to first query
-            query_to_chunks[query_idx].append((i, result))
-        # Sort chunks within each query by their relevance scores
-        for q_idx in query_to_chunks:
-            query_to_chunks[q_idx].sort(key=lambda x: x[1].get('rerank_score', x[1].get('final_score', x[1].get('score', 0))), reverse=True)
-        # Calculate chunks per query (should be 3 for each query with total budget = 9 and 3 queries)
-        num_queries = len(query_to_chunks)
-        if num_queries == 0:
-            return ""
-        # Ensure each query contributes equally (round-robin with guaranteed slots)
-        context_parts = []
-        current_length = 0
-        added_chunks = set()
-        # Calculate how many chunks each query should contribute
-        chunks_per_query = len(results) // num_queries if num_queries > 0 else len(results)
-        extra_chunks = len(results) % num_queries
-        print(f"📊 Context Creation: {num_queries} queries, {chunks_per_query} chunks per query (+{extra_chunks} extra)")
-        for q_idx in sorted(query_to_chunks.keys()):
-            # Determine how many chunks this query should contribute
-            query_chunk_limit = chunks_per_query + (1 if q_idx < extra_chunks else 0)
-            query_chunks_added = 0
-            print(f"   Query {q_idx+1}: Adding up to {query_chunk_limit} chunks")
-            for i, result in query_to_chunks[q_idx]:
-                if i not in added_chunks and query_chunks_added < query_chunk_limit:
-                    text = result['payload'].get('text', '')
-                    relevance_info = ""
-                    if 'rerank_score' in result:
-                        relevance_info = f" [Relevance: {result['rerank_score']:.2f}]"
-                    elif 'final_score' in result:
-                        relevance_info = f" [Score: {result['final_score']:.2f}]"
-                    doc_text = f"[Query {q_idx+1} Doc {len(added_chunks)+1}]{relevance_info}\n{text}\n"
-                    if current_length + len(doc_text) > max_length:
-                        print(f"   ⚠️ Context length limit reached at {current_length} chars")
-                        break
-                    context_parts.append(doc_text)
-                    current_length += len(doc_text)
-                    added_chunks.add(i)
-                    query_chunks_added += 1
-            print(f"   Query {q_idx+1}: Added {query_chunks_added} chunks")
-        print(f"📝 Final context: {len(added_chunks)} chunks, {current_length} chars")
-        return "\n".join(context_parts)

RAG/rag_modules/embedding_manager.py DELETED Viewed

@@ -1,42 +0,0 @@
-"""
-Embedding Management Module for Advanced RAG
-Handles text encoding and embedding operations.
-"""
-import asyncio
-from typing import List
-from sentence_transformers import SentenceTransformer
-from config.config import EMBEDDING_MODEL
-class EmbeddingManager:
-    """Manages text embeddings for RAG operations."""
-    def __init__(self):
-        """Initialize the embedding manager."""
-        self.embedding_model = None
-        self._init_embedding_model()
-    def _init_embedding_model(self):
-        """Initialize the embedding model."""
-        print(f"🔄 Loading embedding model: {EMBEDDING_MODEL}")
-        self.embedding_model = SentenceTransformer(EMBEDDING_MODEL)
-        print(f"✅ Embedding model loaded successfully")
-    async def encode_query(self, query: str) -> List[float]:
-        """Encode a query into embeddings."""
-        def encode_sync():
-            embedding = self.embedding_model.encode([query], normalize_embeddings=True)
-            return embedding[0].astype("float32").tolist()
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, encode_sync)
-    async def encode_texts(self, texts: List[str]) -> List[List[float]]:
-        """Encode multiple texts into embeddings."""
-        def encode_sync():
-            embeddings = self.embedding_model.encode(texts, normalize_embeddings=True)
-            return [emb.astype("float32").tolist() for emb in embeddings]
-        loop = asyncio.get_event_loop()
-        return await loop.run_in_executor(None, encode_sync)

RAG/rag_modules/query_expansion.py DELETED Viewed

@@ -1,128 +0,0 @@
-"""
-Query Expansion Module for Advanced RAG
-Handles breaking down complex queries into focused sub-queries for better information retrieval.
-"""
-import re
-import time
-from typing import List
-from LLM.llm_handler import llm_handler
-from config.config import ENABLE_QUERY_EXPANSION, QUERY_EXPANSION_COUNT
-class QueryExpansionManager:
-    """Manages query expansion for better information retrieval."""
-    def __init__(self):
-        """Initialize the query expansion manager."""
-        self.llm_handler = llm_handler
-        print("✅ Query Expansion Manager initialized")
-    async def expand_query(self, original_query: str) -> List[str]:
-        """Break complex queries into focused parts for better information retrieval."""
-        if not ENABLE_QUERY_EXPANSION:
-            return [original_query]
-        try:
-            expansion_prompt = f"""Analyze this question and break it down into exactly {QUERY_EXPANSION_COUNT} specific, focused sub-questions that can be searched independently in a document. Each sub-question should target a distinct piece of information or process.
-For complex questions with multiple parts, identify:
-1. Different processes or procedures mentioned
-2. Specific information requests (emails, contact details, forms, etc.)
-3. Different entities or subjects involved
-4. Sequential steps that might be documented separately
-Original question: {original_query}
-Break this into exactly {QUERY_EXPANSION_COUNT} focused search queries that target different aspects:
-Examples of good breakdown:
-- "What is the dental claim submission process?"
-- "How to update surname/name in policy records?"
-- "What are the company contact details and grievance email?"
-Provide only {QUERY_EXPANSION_COUNT} focused sub-questions, one per line, without numbering or additional formatting:"""
-            response = await self.llm_handler.generate_simple(
-                expansion_prompt,
-                temperature=0.3,  # Lower temperature for more focused breakdown
-                max_tokens=300    # More tokens for detailed breakdown
-            )
-            expanded_queries = []  # Start with empty list - don't include original
-            if response:
-                sub_queries = response.strip().split('\n')
-                for query in sub_queries:
-                    if len(expanded_queries) >= QUERY_EXPANSION_COUNT:  # Stop when we have enough
-                        break
-                    query = query.strip()
-                    # Remove any numbering or bullet points that might be added
-                    query = re.sub(r'^[\d\.\-\*\s]+', '', query).strip()
-                    if query and len(query) > 10:
-                        expanded_queries.append(query)
-            # If we don't have enough sub-queries, fall back to using the original
-            if len(expanded_queries) < QUERY_EXPANSION_COUNT:
-                expanded_queries = [original_query] * QUERY_EXPANSION_COUNT
-            # Ensure we have exactly QUERY_EXPANSION_COUNT queries
-            final_queries = expanded_queries[:QUERY_EXPANSION_COUNT]
-            print(f"🔄 Query broken down from 1 complex question to {len(final_queries)} focused sub-queries using {self.llm_handler.provider.upper()}")
-            print(f"📌 Original query will be used for final LLM generation only")
-            for i, q in enumerate(final_queries):
-                print(f"   Sub-query {i+1}: {q[:80]}...")
-            return final_queries
-        except Exception as e:
-            print(f"⚠️ Query expansion failed: {e}")
-            return [original_query]
-    def _identify_query_components(self, query: str) -> dict:
-        """Identify different components in a complex query for better breakdown."""
-        components = {
-            'processes': [],
-            'documents': [],
-            'contacts': [],
-            'eligibility': [],
-            'timelines': [],
-            'benefits': []
-        }
-        # Define keywords for different component types
-        process_keywords = ['process', 'procedure', 'steps', 'how to', 'submit', 'apply', 'claim', 'update', 'change', 'enroll']
-        document_keywords = ['documents', 'forms', 'papers', 'certificate', 'proof', 'evidence', 'requirements']
-        contact_keywords = ['email', 'phone', 'contact', 'grievance', 'customer service', 'support', 'helpline']
-        eligibility_keywords = ['eligibility', 'criteria', 'qualify', 'eligible', 'conditions', 'requirements']
-        timeline_keywords = ['timeline', 'period', 'duration', 'time', 'days', 'months', 'waiting', 'grace']
-        benefit_keywords = ['benefits', 'coverage', 'limits', 'amount', 'reimbursement', 'claim amount']
-        query_lower = query.lower()
-        # Check for process-related content
-        if any(keyword in query_lower for keyword in process_keywords):
-            components['processes'].append('process identification')
-        # Check for document-related content
-        if any(keyword in query_lower for keyword in document_keywords):
-            components['documents'].append('document requirements')
-        # Check for contact-related content
-        if any(keyword in query_lower for keyword in contact_keywords):
-            components['contacts'].append('contact information')
-        # Check for eligibility-related content
-        if any(keyword in query_lower for keyword in eligibility_keywords):
-            components['eligibility'].append('eligibility criteria')
-        # Check for timeline-related content
-        if any(keyword in query_lower for keyword in timeline_keywords):
-            components['timelines'].append('timeline information')
-        # Check for benefit-related content
-        if any(keyword in query_lower for keyword in benefit_keywords):
-            components['benefits'].append('benefit details')
-        return components

RAG/rag_modules/reranking_manager.py DELETED Viewed

@@ -1,63 +0,0 @@
-"""
-Reranking Module for Advanced RAG
-Handles result reranking using cross-encoder models.
-"""
-from typing import List, Dict
-from sentence_transformers import CrossEncoder
-from config.config import ENABLE_RERANKING, RERANKER_MODEL, RERANK_TOP_K
-class RerankingManager:
-    """Manages result reranking using cross-encoder models."""
-    def __init__(self):
-        """Initialize the reranking manager."""
-        self.reranker_model = None
-        if ENABLE_RERANKING:
-            self._init_reranker_model()
-        print("✅ Reranking Manager initialized")
-    def _init_reranker_model(self):
-        """Initialize the reranker model."""
-        print(f"🔄 Loading reranker model: {RERANKER_MODEL}")
-        self.reranker_model = CrossEncoder(RERANKER_MODEL)
-        print(f"✅ Reranker model loaded successfully")
-    async def rerank_results(self, query: str, search_results: List[Dict]) -> List[Dict]:
-        """Rerank search results using cross-encoder."""
-        if not ENABLE_RERANKING or not self.reranker_model or len(search_results) <= 1:
-            return search_results
-        try:
-            # Prepare pairs for reranking
-            query_doc_pairs = []
-            for result in search_results:
-                doc_text = result['payload'].get('text', '')[:512]  # Limit text length
-                query_doc_pairs.append([query, doc_text])
-            # Get reranking scores
-            rerank_scores = self.reranker_model.predict(query_doc_pairs)
-            # Combine with original scores
-            for i, result in enumerate(search_results):
-                original_score = result.get('score', 0)
-                rerank_score = float(rerank_scores[i])
-                # Weighted combination of original and rerank scores
-                result['rerank_score'] = rerank_score
-                result['final_score'] = 0.3 * original_score + 0.7 * rerank_score
-            # Sort by final score
-            reranked_results = sorted(
-                search_results,
-                key=lambda x: x['final_score'],
-                reverse=True
-            )
-            print(f"🎯 Reranked {len(search_results)} results")
-            return reranked_results[:RERANK_TOP_K]
-        except Exception as e:
-            print(f"⚠️ Reranking failed: {e}")
-            return search_results[:RERANK_TOP_K]

RAG/rag_modules/search_manager.py DELETED Viewed

@@ -1,334 +0,0 @@
-"""
-Search Module for Advanced RAG
-Handles hybrid search combining BM25 and semantic search with score fusion.
-"""
-import re
-import time
-import numpy as np
-from typing import List, Dict, Any
-from pathlib import Path
-from rank_bm25 import BM25Okapi
-from qdrant_client import QdrantClient
-from config.config import (
-    OUTPUT_DIR, TOP_K, SCORE_THRESHOLD, ENABLE_HYBRID_SEARCH,
-    BM25_WEIGHT, SEMANTIC_WEIGHT, USE_TOTAL_BUDGET_APPROACH
-)
-class SearchManager:
-    """Manages hybrid search operations combining BM25 and semantic search."""
-    def __init__(self, embedding_manager):
-        """Initialize the search manager."""
-        self.embedding_manager = embedding_manager
-        self.base_db_path = Path(OUTPUT_DIR)
-        self.qdrant_clients = {}
-        self.bm25_indexes = {}  # Cache BM25 indexes per document
-        self.document_chunks = {}  # Cache chunks for BM25
-        print("✅ Search Manager initialized")
-    def get_qdrant_client(self, doc_id: str) -> QdrantClient:
-        """Get or create Qdrant client for a specific document."""
-        if doc_id not in self.qdrant_clients:
-            db_path = self.base_db_path / f"{doc_id}_collection.db"
-            if not db_path.exists():
-                raise FileNotFoundError(f"Database not found for document {doc_id}")
-            self.qdrant_clients[doc_id] = QdrantClient(path=str(db_path))
-        return self.qdrant_clients[doc_id]
-    def _load_bm25_index(self, doc_id: str):
-        """Load or create BM25 index for a document."""
-        if doc_id not in self.bm25_indexes:
-            print(f"🔄 Loading BM25 index for {doc_id}")
-            # Get all chunks from Qdrant
-            client = self.get_qdrant_client(doc_id)
-            collection_name = f"{doc_id}_collection"
-            try:
-                # Get all points from collection
-                result = client.scroll(
-                    collection_name=collection_name,
-                    limit=10000,  # Adjust based on your chunk count
-                    with_payload=True,
-                    with_vectors=False
-                )
-                chunks = []
-                chunk_ids = []
-                for point in result[0]:
-                    chunk_text = point.payload.get('text', '')
-                    chunks.append(chunk_text)
-                    chunk_ids.append(point.id)
-                # Tokenize chunks for BM25
-                tokenized_chunks = [self._tokenize_text(chunk) for chunk in chunks]
-                # Create BM25 index
-                self.bm25_indexes[doc_id] = BM25Okapi(tokenized_chunks)
-                self.document_chunks[doc_id] = {
-                    'chunks': chunks,
-                    'chunk_ids': chunk_ids,
-                    'tokenized_chunks': tokenized_chunks
-                }
-                print(f"✅ BM25 index loaded for {doc_id} with {len(chunks)} chunks")
-            except Exception as e:
-                print(f"❌ Error loading BM25 index for {doc_id}: {e}")
-                # Fallback: empty index
-                self.bm25_indexes[doc_id] = BM25Okapi([[]])
-                self.document_chunks[doc_id] = {'chunks': [], 'chunk_ids': [], 'tokenized_chunks': []}
-    def _tokenize_text(self, text: str) -> List[str]:
-        """Simple tokenization for BM25."""
-        # Remove special characters and convert to lowercase
-        text = re.sub(r'[^\w\s]', ' ', text.lower())
-        # Split and filter empty tokens
-        tokens = [token for token in text.split() if len(token) > 2]
-        return tokens
-    async def hybrid_search(self, queries: List[str], doc_id: str, top_k: int = TOP_K) -> List[Dict]:
-        """
-        Perform hybrid search combining BM25 and semantic search.
-        Optimized for focused sub-queries from query breakdown.
-        Uses total budget approach to distribute retrieval across queries.
-        """
-        collection_name = f"{doc_id}_collection"
-        client = self.get_qdrant_client(doc_id)
-        # Ensure BM25 index is loaded
-        if doc_id not in self.bm25_indexes:
-            self._load_bm25_index(doc_id)
-        # Calculate per-query budget based on approach
-        if USE_TOTAL_BUDGET_APPROACH and len(queries) > 1:
-            per_query_budget = max(1, top_k // len(queries))
-            extra_budget = top_k % len(queries)  # Distribute remaining budget
-            print(f"🎯 Total Budget Approach: Distributing {top_k} candidates across {len(queries)} queries")
-            print(f"   📊 Base budget per query: {per_query_budget}")
-            if extra_budget > 0:
-                print(f"   ➕ Extra budget for first {extra_budget} queries: +1 each")
-        else:
-            per_query_budget = top_k
-            extra_budget = 0
-            print(f"🔍 Per-Query Approach: Each query gets {per_query_budget} candidates")
-        all_candidates = {}  # point_id -> {'score': float, 'payload': dict, 'source': str}
-        query_performance = {}  # Track performance of each sub-query
-        print(f"🔍 Running hybrid search with {len(queries)} focused queries...")
-        for query_idx, query in enumerate(queries):
-            query_candidates = 0
-            query_start = time.time()
-            # Calculate this query's budget
-            if USE_TOTAL_BUDGET_APPROACH and len(queries) > 1:
-                query_budget = per_query_budget + (1 if query_idx < extra_budget else 0)
-                search_limit = query_budget * 2  # Get extra for better selection
-            else:
-                query_budget = per_query_budget
-                search_limit = query_budget * 2
-            print(f"   Q{query_idx+1} Budget: {query_budget} candidates (searching {search_limit})")
-            # 1. Semantic Search
-            if ENABLE_HYBRID_SEARCH or not ENABLE_HYBRID_SEARCH:  # Always do semantic
-                try:
-                    query_vector = await self.embedding_manager.encode_query(query)
-                    semantic_results = client.search(
-                        collection_name=collection_name,
-                        query_vector=query_vector,
-                        limit=search_limit,  # Use query-specific limit
-                        score_threshold=SCORE_THRESHOLD
-                    )
-                    # Process semantic results with budget limit
-                    semantic_count = 0
-                    for result in semantic_results:
-                        if USE_TOTAL_BUDGET_APPROACH and semantic_count >= query_budget:
-                            break  # Respect budget limit
-                        point_id = str(result.id)
-                        semantic_score = float(result.score)
-                        if point_id not in all_candidates:
-                            all_candidates[point_id] = {
-                                'semantic_score': 0,
-                                'bm25_score': 0,
-                                'payload': result.payload,
-                                'fusion_score': 0,
-                                'contributing_queries': []
-                            }
-                        # Use max score across queries for semantic, but track which queries contributed
-                        if semantic_score > all_candidates[point_id]['semantic_score']:
-                            all_candidates[point_id]['semantic_score'] = semantic_score
-                        all_candidates[point_id]['contributing_queries'].append({
-                            'query_idx': query_idx,
-                            'query_text': query[:50] + '...' if len(query) > 50 else query,
-                            'semantic_score': semantic_score,
-                            'type': 'semantic'
-                        })
-                        query_candidates += 1
-                        semantic_count += 1
-                except Exception as e:
-                    print(f"⚠️ Semantic search failed for query '{query[:50]}...': {e}")
-            # 2. BM25 Search (if enabled)
-            if ENABLE_HYBRID_SEARCH and doc_id in self.bm25_indexes:
-                try:
-                    tokenized_query = self._tokenize_text(query)
-                    bm25_scores = self.bm25_indexes[doc_id].get_scores(tokenized_query)
-                    # Get top BM25 results with budget consideration
-                    chunk_data = self.document_chunks[doc_id]
-                    bm25_top_indices = np.argsort(bm25_scores)[::-1][:search_limit]
-                    # Process BM25 results with budget limit
-                    bm25_count = 0
-                    for idx in bm25_top_indices:
-                        if USE_TOTAL_BUDGET_APPROACH and bm25_count >= query_budget:
-                            break  # Respect budget limit
-                        if idx < len(chunk_data['chunk_ids']) and bm25_scores[idx] > 0:
-                            point_id = str(chunk_data['chunk_ids'][idx])
-                            bm25_score = float(bm25_scores[idx])
-                            if point_id not in all_candidates:
-                                all_candidates[point_id] = {
-                                    'semantic_score': 0,
-                                    'bm25_score': 0,
-                                    'payload': {'text': chunk_data['chunks'][idx]},
-                                    'fusion_score': 0,
-                                    'contributing_queries': []
-                                }
-                            # Use max score across queries for BM25, but track which queries contributed
-                            if bm25_score > all_candidates[point_id]['bm25_score']:
-                                all_candidates[point_id]['bm25_score'] = bm25_score
-                            all_candidates[point_id]['contributing_queries'].append({
-                                'query_idx': query_idx,
-                                'query_text': query[:50] + '...' if len(query) > 50 else query,
-                                'bm25_score': bm25_score,
-                                'type': 'bm25'
-                            })
-                            query_candidates += 1
-                            bm25_count += 1
-                except Exception as e:
-                    print(f"⚠️ BM25 search failed for query '{query[:50]}...': {e}")
-            # Track query performance with budget info
-            query_time = time.time() - query_start
-            query_performance[query_idx] = {
-                'query': query[:80] + '...' if len(query) > 80 else query,
-                'candidates_found': query_candidates,
-                'budget_allocated': query_budget if USE_TOTAL_BUDGET_APPROACH else 'unlimited',
-                'time': query_time
-            }
-        # 3. Score Fusion (Reciprocal Rank Fusion + Weighted Combination)
-        self._apply_score_fusion(all_candidates)
-        # 4. Sort by fusion score and return top results
-        sorted_candidates = sorted(
-            all_candidates.items(),
-            key=lambda x: x[1]['fusion_score'],
-            reverse=True
-        )
-        # Convert to result format with enhanced metadata
-        hybrid_results = []
-        for point_id, data in sorted_candidates[:top_k]:
-            hybrid_results.append({
-                'id': point_id,
-                'score': data['fusion_score'],
-                'payload': data['payload'],
-                'semantic_score': data['semantic_score'],
-                'bm25_score': data['bm25_score'],
-                'contributing_queries': data['contributing_queries']
-            })
-        # Log performance summary
-        approach_name = "Total Budget" if USE_TOTAL_BUDGET_APPROACH else "Per-Query"
-        print(f"🔍 Hybrid search completed ({approach_name} Approach):")
-        print(f"   📊 {len(all_candidates)} total candidates from {len(queries)} focused queries")
-        print(f"   🎯 Top {len(hybrid_results)} results selected")
-        # Log per-query performance with budget info
-        total_budget_used = 0
-        for idx, perf in query_performance.items():
-            budget_info = f" (budget: {perf['budget_allocated']})" if USE_TOTAL_BUDGET_APPROACH else ""
-            print(f"   Q{idx+1}: {perf['candidates_found']} candidates{budget_info} in {perf['time']:.3f}s")
-            print(f"        Query: {perf['query']}")
-            if USE_TOTAL_BUDGET_APPROACH and isinstance(perf['budget_allocated'], int):
-                total_budget_used += perf['candidates_found']
-        if USE_TOTAL_BUDGET_APPROACH:
-            print(f"   💰 Total budget efficiency: {total_budget_used}/{top_k} candidates used")
-        return hybrid_results
-    def _apply_score_fusion(self, candidates: Dict):
-        """Apply advanced score fusion techniques."""
-        if not candidates:
-            return
-        # Normalize scores
-        semantic_scores = [data['semantic_score'] for data in candidates.values() if data['semantic_score'] > 0]
-        bm25_scores = [data['bm25_score'] for data in candidates.values() if data['bm25_score'] > 0]
-        # Min-Max normalization
-        if semantic_scores:
-            sem_min, sem_max = min(semantic_scores), max(semantic_scores)
-            sem_range = sem_max - sem_min if sem_max > sem_min else 1
-        else:
-            sem_min, sem_range = 0, 1
-        if bm25_scores:
-            bm25_min, bm25_max = min(bm25_scores), max(bm25_scores)
-            bm25_range = bm25_max - bm25_min if bm25_max > bm25_min else 1
-        else:
-            bm25_min, bm25_range = 0, 1
-        # Calculate fusion scores
-        for point_id, data in candidates.items():
-            # Normalize scores
-            norm_semantic = (data['semantic_score'] - sem_min) / sem_range if data['semantic_score'] > 0 else 0
-            norm_bm25 = (data['bm25_score'] - bm25_min) / bm25_range if data['bm25_score'] > 0 else 0
-            # Weighted combination
-            if ENABLE_HYBRID_SEARCH:
-                fusion_score = (SEMANTIC_WEIGHT * norm_semantic) + (BM25_WEIGHT * norm_bm25)
-            else:
-                fusion_score = norm_semantic
-            # Add reciprocal rank fusion bonus (helps with ranking diversity)
-            rank_bonus = 1.0 / (1.0 + max(norm_semantic, norm_bm25) * 10)
-            fusion_score += rank_bonus * 0.1
-            data['fusion_score'] = fusion_score
-    def cleanup(self):
-        """Cleanup search manager resources."""
-        print("🧹 Cleaning up Search Manager resources...")
-        # Close all Qdrant clients
-        for client in self.qdrant_clients.values():
-            try:
-                client.close()
-            except Exception:
-                pass
-        self.qdrant_clients.clear()
-        self.bm25_indexes.clear()
-        self.document_chunks.clear()
-        print("✅ Search Manager cleanup completed")