quantumbit commited on
Commit
c1a4784
·
verified ·
1 Parent(s): 3fe7b63

Delete RAG

Browse files
RAG/__init__.py DELETED
@@ -1 +0,0 @@
1
- # RAG Package
 
 
RAG/advanced_rag_processor.py DELETED
@@ -1,169 +0,0 @@
1
- """
2
- Advanced RAG Processor - Modular Version
3
- Orchestrates all RAG components for document question answering.
4
- Version: 3.0 - Modular Architecture
5
- """
6
-
7
- import time
8
- from typing import Dict, Tuple
9
- from pathlib import Path
10
-
11
- # Import all modular components
12
- from RAG.rag_modules.query_expansion import QueryExpansionManager
13
- from RAG.rag_modules.embedding_manager import EmbeddingManager
14
- from RAG.rag_modules.search_manager import SearchManager
15
- from RAG.rag_modules.reranking_manager import RerankingManager
16
- from RAG.rag_modules.context_manager import ContextManager
17
- from RAG.rag_modules.answer_generator import AnswerGenerator
18
-
19
- from LLM.llm_handler import llm_handler
20
- from config.config import OUTPUT_DIR, TOP_K
21
-
22
-
23
- class AdvancedRAGProcessor:
24
- """
25
- Advanced RAG processor with modular architecture for better maintainability.
26
- Orchestrates query expansion, hybrid search, reranking, and answer generation.
27
- """
28
-
29
- def __init__(self):
30
- """Initialize the advanced RAG processor with all modules."""
31
- self.base_db_path = Path(OUTPUT_DIR)
32
-
33
- # Initialize all managers
34
- print("🚀 Initializing Advanced RAG Processor (Modular)...")
35
-
36
- # Core components
37
- self.embedding_manager = EmbeddingManager()
38
- self.query_expansion_manager = QueryExpansionManager()
39
- self.search_manager = SearchManager(self.embedding_manager)
40
- self.reranking_manager = RerankingManager()
41
- self.context_manager = ContextManager()
42
- self.answer_generator = AnswerGenerator()
43
-
44
- # Keep reference to LLM handler for info
45
- self.llm_handler = llm_handler
46
-
47
- print(f"✅ Advanced RAG Processor initialized with {self.llm_handler.provider.upper()} LLM")
48
- print("📦 All modules loaded successfully:")
49
- print(" 🔄 Query Expansion Manager")
50
- print(" 🧠 Embedding Manager")
51
- print(" 🔍 Search Manager (Hybrid)")
52
- print(" 🎯 Reranking Manager")
53
- print(" 📝 Context Manager")
54
- print(" 💬 Answer Generator")
55
-
56
- async def answer_question(self, question: str, doc_id: str, logger=None, request_id: str = None) -> Tuple[str, Dict[str, float]]:
57
- """
58
- Answer a question using advanced RAG techniques with detailed timing.
59
-
60
- Args:
61
- question: The question to answer
62
- doc_id: Document ID to search in
63
- logger: Optional logger for tracking
64
- request_id: Optional request ID for logging
65
-
66
- Returns:
67
- Tuple of (answer, timing_breakdown)
68
- """
69
- timings = {}
70
- overall_start = time.time()
71
-
72
- try:
73
- # Check if collection exists
74
- collection_name = f"{doc_id}_collection"
75
- try:
76
- client = self.search_manager.get_qdrant_client(doc_id)
77
- collection_info = client.get_collection(collection_name)
78
- except Exception:
79
- return "I don't have information about this document. Please ensure the document has been processed.", timings
80
-
81
- print(f"🚀 Advanced RAG processing for: {question[:100]}...")
82
-
83
- # Step 1: Query Expansion
84
- step_start = time.time()
85
- expanded_queries = await self.query_expansion_manager.expand_query(question)
86
- expansion_time = time.time() - step_start
87
- timings['query_expansion'] = expansion_time
88
- if logger and request_id:
89
- logger.log_pipeline_stage(request_id, "query_expansion", expansion_time)
90
-
91
- # Step 2: Hybrid Search with Fusion
92
- step_start = time.time()
93
- search_results = await self.search_manager.hybrid_search(expanded_queries, doc_id, TOP_K)
94
- search_time = time.time() - step_start
95
- timings['hybrid_search'] = search_time
96
- if logger and request_id:
97
- logger.log_pipeline_stage(request_id, "hybrid_search", search_time)
98
-
99
- if not search_results:
100
- return "I couldn't find relevant information to answer your question.", timings
101
-
102
- # Step 3: Reranking
103
- step_start = time.time()
104
- reranked_results = await self.reranking_manager.rerank_results(question, search_results)
105
- rerank_time = time.time() - step_start
106
- timings['reranking'] = rerank_time
107
- if logger and request_id:
108
- logger.log_pipeline_stage(request_id, "reranking", rerank_time)
109
-
110
- # Step 4: Multi-perspective Context Creation
111
- step_start = time.time()
112
- context = self.context_manager.create_enhanced_context(question, reranked_results)
113
- context_time = time.time() - step_start
114
- timings['context_creation'] = context_time
115
- if logger and request_id:
116
- logger.log_pipeline_stage(request_id, "context_creation", context_time)
117
-
118
- # Step 5: Enhanced Answer Generation
119
- step_start = time.time()
120
- answer = await self.answer_generator.generate_enhanced_answer(question, context, expanded_queries)
121
- generation_time = time.time() - step_start
122
- timings['llm_generation'] = generation_time
123
- if logger and request_id:
124
- logger.log_pipeline_stage(request_id, "llm_generation", generation_time)
125
-
126
- # Calculate total time
127
- total_time = time.time() - overall_start
128
- timings['total_pipeline'] = total_time
129
-
130
- print(f"\n✅ Advanced RAG processing completed in {total_time:.4f}s")
131
- print(f" 🔍 Query expansion: {expansion_time:.4f}s")
132
- print(f" 🔎 Hybrid search: {search_time:.4f}s")
133
- print(f" 🎯 Reranking: {rerank_time:.4f}s")
134
- print(f" 📝 Context creation: {context_time:.4f}s")
135
- print(f" 💬 LLM generation: {generation_time:.4f}s")
136
-
137
- return answer, timings
138
-
139
- except Exception as e:
140
- error_time = time.time() - overall_start
141
- timings['error_time'] = error_time
142
- print(f"❌ Error in advanced RAG processing: {str(e)}")
143
- return f"I encountered an error while processing your question: {str(e)}", timings
144
-
145
- def cleanup(self):
146
- """Cleanup all manager resources."""
147
- print("🧹 Cleaning up Advanced RAG processor resources...")
148
-
149
- # Cleanup search manager (which has the most resources)
150
- self.search_manager.cleanup()
151
-
152
- print("✅ Advanced RAG cleanup completed")
153
-
154
- def get_system_info(self) -> Dict:
155
- """Get information about the RAG system."""
156
- return {
157
- "version": "3.0 - Modular",
158
- "llm_provider": self.llm_handler.provider,
159
- "llm_model": self.llm_handler.model_name,
160
- "modules": [
161
- "QueryExpansionManager",
162
- "EmbeddingManager",
163
- "SearchManager",
164
- "RerankingManager",
165
- "ContextManager",
166
- "AnswerGenerator"
167
- ],
168
- "base_db_path": str(self.base_db_path)
169
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
RAG/rag_embeddings/.gitkeep DELETED
File without changes
RAG/rag_modules/__init__.py DELETED
@@ -1 +0,0 @@
1
- # RAG Modules Package
 
 
RAG/rag_modules/answer_generator.py DELETED
@@ -1,97 +0,0 @@
1
- """
2
- Answer Generation Module for Advanced RAG
3
- Handles LLM-based answer generation with enhanced prompting.
4
- """
5
-
6
- from typing import List
7
- from LLM.llm_handler import llm_handler
8
- from config.config import TEMPERATURE, MAX_TOKENS
9
-
10
-
11
- class AnswerGenerator:
12
- """Manages answer generation using LLM."""
13
-
14
- def __init__(self):
15
- """Initialize the answer generator."""
16
- self.llm_handler = llm_handler
17
- print("✅ Answer Generator initialized")
18
-
19
- async def generate_enhanced_answer(self, original_question: str, context: str, expanded_queries: List[str]) -> str:
20
- """Generate enhanced answer using the original question with retrieved context."""
21
-
22
- # Use only the original question for LLM generation
23
- query_context = f"Question: {original_question}"
24
-
25
- system_prompt = """
26
-
27
- You are an expert AI assistant specializing in document analysis and policy-related question answering. You have access to relevant document excerpts and must respond only based on this information. You are designed specifically for analyzing official documents and answering user queries related to them.
28
-
29
- STRICT RULES AND RESPONSE CONDITIONS:
30
-
31
- Irrelevant/Out-of-Scope Queries (e.g., programming help, general product info, coding tasks):
32
- Respond EXACTLY:
33
-
34
- "I cannot help with that. I am designed only to answer queries related to the provided document excerpts."
35
-
36
- Illegal or Prohibited Requests (e.g., forgery, fraud, bypassing regulations):
37
- Respond CLEARLY that the request is illegal. Example format:
38
-
39
- "This request is illegal and cannot be supported. According to the applicable regulations in the document, [explain why it's illegal if mentioned]. Engaging in such activity may lead to legal consequences."
40
- If illegality is not explicitly in the documents, use:
41
- "This request involves illegal activity and is against policy. I cannot assist with this."
42
-
43
- Nonexistent Concepts, Schemes, or Entities:
44
- Respond by stating the concept does not exist and offer clarification by pointing to related valid information. Example:
45
-
46
- "There is no mention of such a scheme in the document. However, the following related schemes are described: [summarize relevant ones]."
47
-
48
- Valid Topics with Missing or Incomplete Information:
49
- Respond that the exact answer is unavailable, then provide all related details and recommend official contact. Example:
50
-
51
- "The exact information is not available in the provided document. However, here is what is relevant: [details]. For further clarification, you may contact: [official contact details if included in the document]."
52
-
53
- Valid Questions Answerable from Document:
54
- Provide a concise and accurate answer with clear reference to the document content. Also include any related notes that might aid understanding. Example:
55
-
56
- "[Answer]. According to the policy document, [quote/summary from actual document content]."
57
-
58
- GENERAL ANSWERING RULES:
59
-
60
- Use ONLY the provided document excerpts. Never use external knowledge.
61
-
62
- Be concise: 5-6 sentences per answer, with all the details available for that particular query.
63
-
64
- Start directly with the answer. Do not restate or rephrase the question.
65
-
66
- Never speculate or elaborate beyond what is explicitly stated.
67
-
68
- When referencing information, mention "according to the document" or "as stated in the policy" rather than using internal labels like "Query X Doc Y".
69
-
70
- Do not reference internal organizational labels like [Query 1 Doc 2] or [Relevance: X.XX] - these are for processing only.
71
-
72
- Focus on the actual document content and policy information when providing answers.
73
-
74
- The user may phrase questions in various ways — always infer the intent, apply the rules above, and respond accordingly.
75
-
76
- """
77
-
78
- user_prompt = f"""{query_context}
79
-
80
- Document Excerpts:
81
- {context}
82
-
83
- Provide a comprehensive answer based on the document excerpts above:"""
84
-
85
- try:
86
- answer = await self.llm_handler.generate_text(
87
- system_prompt=system_prompt,
88
- user_prompt=user_prompt,
89
- temperature=TEMPERATURE,
90
- max_tokens=MAX_TOKENS
91
- )
92
-
93
- return answer.strip()
94
-
95
- except Exception as e:
96
- print(f"❌ Error generating enhanced response with {self.llm_handler.provider.upper()}: {str(e)}")
97
- return "I encountered an error while generating the response."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
RAG/rag_modules/context_manager.py DELETED
@@ -1,81 +0,0 @@
1
- """
2
- Context Management Module for Advanced RAG
3
- Handles context creation and management for LLM generation.
4
- """
5
-
6
- from typing import List, Dict
7
- from collections import defaultdict
8
- from config.config import MAX_CONTEXT_LENGTH
9
-
10
-
11
- class ContextManager:
12
- """Manages context creation for LLM generation."""
13
-
14
- def __init__(self):
15
- """Initialize the context manager."""
16
- print("✅ Context Manager initialized")
17
-
18
- def create_enhanced_context(self, question: str, results: List[Dict], max_length: int = MAX_CONTEXT_LENGTH) -> str:
19
- """Create enhanced context ensuring each query contributes equally."""
20
- # Group results by expanded query index
21
- query_to_chunks = defaultdict(list)
22
- for i, result in enumerate(results):
23
- # Find the most relevant expanded query for this chunk
24
- if 'contributing_queries' in result and result['contributing_queries']:
25
- # Use the highest scoring contributing query
26
- best_contrib = max(result['contributing_queries'], key=lambda cq: cq.get('semantic_score', cq.get('bm25_score', 0)))
27
- query_idx = best_contrib['query_idx']
28
- else:
29
- query_idx = 0 # fallback to first query
30
- query_to_chunks[query_idx].append((i, result))
31
-
32
- # Sort chunks within each query by their relevance scores
33
- for q_idx in query_to_chunks:
34
- query_to_chunks[q_idx].sort(key=lambda x: x[1].get('rerank_score', x[1].get('final_score', x[1].get('score', 0))), reverse=True)
35
-
36
- # Calculate chunks per query (should be 3 for each query with total budget = 9 and 3 queries)
37
- num_queries = len(query_to_chunks)
38
- if num_queries == 0:
39
- return ""
40
-
41
- # Ensure each query contributes equally (round-robin with guaranteed slots)
42
- context_parts = []
43
- current_length = 0
44
- added_chunks = set()
45
-
46
- # Calculate how many chunks each query should contribute
47
- chunks_per_query = len(results) // num_queries if num_queries > 0 else len(results)
48
- extra_chunks = len(results) % num_queries
49
-
50
- print(f"📊 Context Creation: {num_queries} queries, {chunks_per_query} chunks per query (+{extra_chunks} extra)")
51
-
52
- for q_idx in sorted(query_to_chunks.keys()):
53
- # Determine how many chunks this query should contribute
54
- query_chunk_limit = chunks_per_query + (1 if q_idx < extra_chunks else 0)
55
- query_chunks_added = 0
56
-
57
- print(f" Query {q_idx+1}: Adding up to {query_chunk_limit} chunks")
58
-
59
- for i, result in query_to_chunks[q_idx]:
60
- if i not in added_chunks and query_chunks_added < query_chunk_limit:
61
- text = result['payload'].get('text', '')
62
- relevance_info = ""
63
- if 'rerank_score' in result:
64
- relevance_info = f" [Relevance: {result['rerank_score']:.2f}]"
65
- elif 'final_score' in result:
66
- relevance_info = f" [Score: {result['final_score']:.2f}]"
67
- doc_text = f"[Query {q_idx+1} Doc {len(added_chunks)+1}]{relevance_info}\n{text}\n"
68
-
69
- if current_length + len(doc_text) > max_length:
70
- print(f" ⚠️ Context length limit reached at {current_length} chars")
71
- break
72
-
73
- context_parts.append(doc_text)
74
- current_length += len(doc_text)
75
- added_chunks.add(i)
76
- query_chunks_added += 1
77
-
78
- print(f" Query {q_idx+1}: Added {query_chunks_added} chunks")
79
-
80
- print(f"📝 Final context: {len(added_chunks)} chunks, {current_length} chars")
81
- return "\n".join(context_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
RAG/rag_modules/embedding_manager.py DELETED
@@ -1,42 +0,0 @@
1
- """
2
- Embedding Management Module for Advanced RAG
3
- Handles text encoding and embedding operations.
4
- """
5
-
6
- import asyncio
7
- from typing import List
8
- from sentence_transformers import SentenceTransformer
9
- from config.config import EMBEDDING_MODEL
10
-
11
-
12
- class EmbeddingManager:
13
- """Manages text embeddings for RAG operations."""
14
-
15
- def __init__(self):
16
- """Initialize the embedding manager."""
17
- self.embedding_model = None
18
- self._init_embedding_model()
19
-
20
- def _init_embedding_model(self):
21
- """Initialize the embedding model."""
22
- print(f"🔄 Loading embedding model: {EMBEDDING_MODEL}")
23
- self.embedding_model = SentenceTransformer(EMBEDDING_MODEL)
24
- print(f"✅ Embedding model loaded successfully")
25
-
26
- async def encode_query(self, query: str) -> List[float]:
27
- """Encode a query into embeddings."""
28
- def encode_sync():
29
- embedding = self.embedding_model.encode([query], normalize_embeddings=True)
30
- return embedding[0].astype("float32").tolist()
31
-
32
- loop = asyncio.get_event_loop()
33
- return await loop.run_in_executor(None, encode_sync)
34
-
35
- async def encode_texts(self, texts: List[str]) -> List[List[float]]:
36
- """Encode multiple texts into embeddings."""
37
- def encode_sync():
38
- embeddings = self.embedding_model.encode(texts, normalize_embeddings=True)
39
- return [emb.astype("float32").tolist() for emb in embeddings]
40
-
41
- loop = asyncio.get_event_loop()
42
- return await loop.run_in_executor(None, encode_sync)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
RAG/rag_modules/query_expansion.py DELETED
@@ -1,128 +0,0 @@
1
- """
2
- Query Expansion Module for Advanced RAG
3
- Handles breaking down complex queries into focused sub-queries for better information retrieval.
4
- """
5
-
6
- import re
7
- import time
8
- from typing import List
9
- from LLM.llm_handler import llm_handler
10
- from config.config import ENABLE_QUERY_EXPANSION, QUERY_EXPANSION_COUNT
11
-
12
-
13
- class QueryExpansionManager:
14
- """Manages query expansion for better information retrieval."""
15
-
16
- def __init__(self):
17
- """Initialize the query expansion manager."""
18
- self.llm_handler = llm_handler
19
- print("✅ Query Expansion Manager initialized")
20
-
21
- async def expand_query(self, original_query: str) -> List[str]:
22
- """Break complex queries into focused parts for better information retrieval."""
23
- if not ENABLE_QUERY_EXPANSION:
24
- return [original_query]
25
-
26
- try:
27
- expansion_prompt = f"""Analyze this question and break it down into exactly {QUERY_EXPANSION_COUNT} specific, focused sub-questions that can be searched independently in a document. Each sub-question should target a distinct piece of information or process.
28
-
29
- For complex questions with multiple parts, identify:
30
- 1. Different processes or procedures mentioned
31
- 2. Specific information requests (emails, contact details, forms, etc.)
32
- 3. Different entities or subjects involved
33
- 4. Sequential steps that might be documented separately
34
-
35
- Original question: {original_query}
36
-
37
- Break this into exactly {QUERY_EXPANSION_COUNT} focused search queries that target different aspects:
38
-
39
- Examples of good breakdown:
40
- - "What is the dental claim submission process?"
41
- - "How to update surname/name in policy records?"
42
- - "What are the company contact details and grievance email?"
43
-
44
- Provide only {QUERY_EXPANSION_COUNT} focused sub-questions, one per line, without numbering or additional formatting:"""
45
-
46
- response = await self.llm_handler.generate_simple(
47
- expansion_prompt,
48
- temperature=0.3, # Lower temperature for more focused breakdown
49
- max_tokens=300 # More tokens for detailed breakdown
50
- )
51
-
52
- expanded_queries = [] # Start with empty list - don't include original
53
-
54
- if response:
55
- sub_queries = response.strip().split('\n')
56
- for query in sub_queries:
57
- if len(expanded_queries) >= QUERY_EXPANSION_COUNT: # Stop when we have enough
58
- break
59
- query = query.strip()
60
- # Remove any numbering or bullet points that might be added
61
- query = re.sub(r'^[\d\.\-\*\s]+', '', query).strip()
62
- if query and len(query) > 10:
63
- expanded_queries.append(query)
64
-
65
- # If we don't have enough sub-queries, fall back to using the original
66
- if len(expanded_queries) < QUERY_EXPANSION_COUNT:
67
- expanded_queries = [original_query] * QUERY_EXPANSION_COUNT
68
-
69
- # Ensure we have exactly QUERY_EXPANSION_COUNT queries
70
- final_queries = expanded_queries[:QUERY_EXPANSION_COUNT]
71
-
72
- print(f"🔄 Query broken down from 1 complex question to {len(final_queries)} focused sub-queries using {self.llm_handler.provider.upper()}")
73
- print(f"📌 Original query will be used for final LLM generation only")
74
- for i, q in enumerate(final_queries):
75
- print(f" Sub-query {i+1}: {q[:80]}...")
76
-
77
- return final_queries
78
-
79
- except Exception as e:
80
- print(f"⚠️ Query expansion failed: {e}")
81
- return [original_query]
82
-
83
- def _identify_query_components(self, query: str) -> dict:
84
- """Identify different components in a complex query for better breakdown."""
85
- components = {
86
- 'processes': [],
87
- 'documents': [],
88
- 'contacts': [],
89
- 'eligibility': [],
90
- 'timelines': [],
91
- 'benefits': []
92
- }
93
-
94
- # Define keywords for different component types
95
- process_keywords = ['process', 'procedure', 'steps', 'how to', 'submit', 'apply', 'claim', 'update', 'change', 'enroll']
96
- document_keywords = ['documents', 'forms', 'papers', 'certificate', 'proof', 'evidence', 'requirements']
97
- contact_keywords = ['email', 'phone', 'contact', 'grievance', 'customer service', 'support', 'helpline']
98
- eligibility_keywords = ['eligibility', 'criteria', 'qualify', 'eligible', 'conditions', 'requirements']
99
- timeline_keywords = ['timeline', 'period', 'duration', 'time', 'days', 'months', 'waiting', 'grace']
100
- benefit_keywords = ['benefits', 'coverage', 'limits', 'amount', 'reimbursement', 'claim amount']
101
-
102
- query_lower = query.lower()
103
-
104
- # Check for process-related content
105
- if any(keyword in query_lower for keyword in process_keywords):
106
- components['processes'].append('process identification')
107
-
108
- # Check for document-related content
109
- if any(keyword in query_lower for keyword in document_keywords):
110
- components['documents'].append('document requirements')
111
-
112
- # Check for contact-related content
113
- if any(keyword in query_lower for keyword in contact_keywords):
114
- components['contacts'].append('contact information')
115
-
116
- # Check for eligibility-related content
117
- if any(keyword in query_lower for keyword in eligibility_keywords):
118
- components['eligibility'].append('eligibility criteria')
119
-
120
- # Check for timeline-related content
121
- if any(keyword in query_lower for keyword in timeline_keywords):
122
- components['timelines'].append('timeline information')
123
-
124
- # Check for benefit-related content
125
- if any(keyword in query_lower for keyword in benefit_keywords):
126
- components['benefits'].append('benefit details')
127
-
128
- return components
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
RAG/rag_modules/reranking_manager.py DELETED
@@ -1,63 +0,0 @@
1
- """
2
- Reranking Module for Advanced RAG
3
- Handles result reranking using cross-encoder models.
4
- """
5
-
6
- from typing import List, Dict
7
- from sentence_transformers import CrossEncoder
8
- from config.config import ENABLE_RERANKING, RERANKER_MODEL, RERANK_TOP_K
9
-
10
-
11
- class RerankingManager:
12
- """Manages result reranking using cross-encoder models."""
13
-
14
- def __init__(self):
15
- """Initialize the reranking manager."""
16
- self.reranker_model = None
17
- if ENABLE_RERANKING:
18
- self._init_reranker_model()
19
- print("✅ Reranking Manager initialized")
20
-
21
- def _init_reranker_model(self):
22
- """Initialize the reranker model."""
23
- print(f"🔄 Loading reranker model: {RERANKER_MODEL}")
24
- self.reranker_model = CrossEncoder(RERANKER_MODEL)
25
- print(f"✅ Reranker model loaded successfully")
26
-
27
- async def rerank_results(self, query: str, search_results: List[Dict]) -> List[Dict]:
28
- """Rerank search results using cross-encoder."""
29
- if not ENABLE_RERANKING or not self.reranker_model or len(search_results) <= 1:
30
- return search_results
31
-
32
- try:
33
- # Prepare pairs for reranking
34
- query_doc_pairs = []
35
- for result in search_results:
36
- doc_text = result['payload'].get('text', '')[:512] # Limit text length
37
- query_doc_pairs.append([query, doc_text])
38
-
39
- # Get reranking scores
40
- rerank_scores = self.reranker_model.predict(query_doc_pairs)
41
-
42
- # Combine with original scores
43
- for i, result in enumerate(search_results):
44
- original_score = result.get('score', 0)
45
- rerank_score = float(rerank_scores[i])
46
-
47
- # Weighted combination of original and rerank scores
48
- result['rerank_score'] = rerank_score
49
- result['final_score'] = 0.3 * original_score + 0.7 * rerank_score
50
-
51
- # Sort by final score
52
- reranked_results = sorted(
53
- search_results,
54
- key=lambda x: x['final_score'],
55
- reverse=True
56
- )
57
-
58
- print(f"🎯 Reranked {len(search_results)} results")
59
- return reranked_results[:RERANK_TOP_K]
60
-
61
- except Exception as e:
62
- print(f"⚠️ Reranking failed: {e}")
63
- return search_results[:RERANK_TOP_K]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
RAG/rag_modules/search_manager.py DELETED
@@ -1,334 +0,0 @@
1
- """
2
- Search Module for Advanced RAG
3
- Handles hybrid search combining BM25 and semantic search with score fusion.
4
- """
5
-
6
- import re
7
- import time
8
- import numpy as np
9
- from typing import List, Dict, Any
10
- from pathlib import Path
11
- from rank_bm25 import BM25Okapi
12
- from qdrant_client import QdrantClient
13
-
14
- from config.config import (
15
- OUTPUT_DIR, TOP_K, SCORE_THRESHOLD, ENABLE_HYBRID_SEARCH,
16
- BM25_WEIGHT, SEMANTIC_WEIGHT, USE_TOTAL_BUDGET_APPROACH
17
- )
18
-
19
-
20
- class SearchManager:
21
- """Manages hybrid search operations combining BM25 and semantic search."""
22
-
23
- def __init__(self, embedding_manager):
24
- """Initialize the search manager."""
25
- self.embedding_manager = embedding_manager
26
- self.base_db_path = Path(OUTPUT_DIR)
27
- self.qdrant_clients = {}
28
- self.bm25_indexes = {} # Cache BM25 indexes per document
29
- self.document_chunks = {} # Cache chunks for BM25
30
- print("✅ Search Manager initialized")
31
-
32
- def get_qdrant_client(self, doc_id: str) -> QdrantClient:
33
- """Get or create Qdrant client for a specific document."""
34
- if doc_id not in self.qdrant_clients:
35
- db_path = self.base_db_path / f"{doc_id}_collection.db"
36
- if not db_path.exists():
37
- raise FileNotFoundError(f"Database not found for document {doc_id}")
38
- self.qdrant_clients[doc_id] = QdrantClient(path=str(db_path))
39
- return self.qdrant_clients[doc_id]
40
-
41
- def _load_bm25_index(self, doc_id: str):
42
- """Load or create BM25 index for a document."""
43
- if doc_id not in self.bm25_indexes:
44
- print(f"🔄 Loading BM25 index for {doc_id}")
45
-
46
- # Get all chunks from Qdrant
47
- client = self.get_qdrant_client(doc_id)
48
- collection_name = f"{doc_id}_collection"
49
-
50
- try:
51
- # Get all points from collection
52
- result = client.scroll(
53
- collection_name=collection_name,
54
- limit=10000, # Adjust based on your chunk count
55
- with_payload=True,
56
- with_vectors=False
57
- )
58
-
59
- chunks = []
60
- chunk_ids = []
61
-
62
- for point in result[0]:
63
- chunk_text = point.payload.get('text', '')
64
- chunks.append(chunk_text)
65
- chunk_ids.append(point.id)
66
-
67
- # Tokenize chunks for BM25
68
- tokenized_chunks = [self._tokenize_text(chunk) for chunk in chunks]
69
-
70
- # Create BM25 index
71
- self.bm25_indexes[doc_id] = BM25Okapi(tokenized_chunks)
72
- self.document_chunks[doc_id] = {
73
- 'chunks': chunks,
74
- 'chunk_ids': chunk_ids,
75
- 'tokenized_chunks': tokenized_chunks
76
- }
77
-
78
- print(f"✅ BM25 index loaded for {doc_id} with {len(chunks)} chunks")
79
-
80
- except Exception as e:
81
- print(f"❌ Error loading BM25 index for {doc_id}: {e}")
82
- # Fallback: empty index
83
- self.bm25_indexes[doc_id] = BM25Okapi([[]])
84
- self.document_chunks[doc_id] = {'chunks': [], 'chunk_ids': [], 'tokenized_chunks': []}
85
-
86
- def _tokenize_text(self, text: str) -> List[str]:
87
- """Simple tokenization for BM25."""
88
- # Remove special characters and convert to lowercase
89
- text = re.sub(r'[^\w\s]', ' ', text.lower())
90
- # Split and filter empty tokens
91
- tokens = [token for token in text.split() if len(token) > 2]
92
- return tokens
93
-
94
- async def hybrid_search(self, queries: List[str], doc_id: str, top_k: int = TOP_K) -> List[Dict]:
95
- """
96
- Perform hybrid search combining BM25 and semantic search.
97
- Optimized for focused sub-queries from query breakdown.
98
- Uses total budget approach to distribute retrieval across queries.
99
- """
100
- collection_name = f"{doc_id}_collection"
101
- client = self.get_qdrant_client(doc_id)
102
-
103
- # Ensure BM25 index is loaded
104
- if doc_id not in self.bm25_indexes:
105
- self._load_bm25_index(doc_id)
106
-
107
- # Calculate per-query budget based on approach
108
- if USE_TOTAL_BUDGET_APPROACH and len(queries) > 1:
109
- per_query_budget = max(1, top_k // len(queries))
110
- extra_budget = top_k % len(queries) # Distribute remaining budget
111
- print(f"🎯 Total Budget Approach: Distributing {top_k} candidates across {len(queries)} queries")
112
- print(f" 📊 Base budget per query: {per_query_budget}")
113
- if extra_budget > 0:
114
- print(f" ➕ Extra budget for first {extra_budget} queries: +1 each")
115
- else:
116
- per_query_budget = top_k
117
- extra_budget = 0
118
- print(f"🔍 Per-Query Approach: Each query gets {per_query_budget} candidates")
119
-
120
- all_candidates = {} # point_id -> {'score': float, 'payload': dict, 'source': str}
121
- query_performance = {} # Track performance of each sub-query
122
-
123
- print(f"🔍 Running hybrid search with {len(queries)} focused queries...")
124
-
125
- for query_idx, query in enumerate(queries):
126
- query_candidates = 0
127
- query_start = time.time()
128
-
129
- # Calculate this query's budget
130
- if USE_TOTAL_BUDGET_APPROACH and len(queries) > 1:
131
- query_budget = per_query_budget + (1 if query_idx < extra_budget else 0)
132
- search_limit = query_budget * 2 # Get extra for better selection
133
- else:
134
- query_budget = per_query_budget
135
- search_limit = query_budget * 2
136
-
137
- print(f" Q{query_idx+1} Budget: {query_budget} candidates (searching {search_limit})")
138
-
139
- # 1. Semantic Search
140
- if ENABLE_HYBRID_SEARCH or not ENABLE_HYBRID_SEARCH: # Always do semantic
141
- try:
142
- query_vector = await self.embedding_manager.encode_query(query)
143
- semantic_results = client.search(
144
- collection_name=collection_name,
145
- query_vector=query_vector,
146
- limit=search_limit, # Use query-specific limit
147
- score_threshold=SCORE_THRESHOLD
148
- )
149
-
150
- # Process semantic results with budget limit
151
- semantic_count = 0
152
- for result in semantic_results:
153
- if USE_TOTAL_BUDGET_APPROACH and semantic_count >= query_budget:
154
- break # Respect budget limit
155
-
156
- point_id = str(result.id)
157
- semantic_score = float(result.score)
158
-
159
- if point_id not in all_candidates:
160
- all_candidates[point_id] = {
161
- 'semantic_score': 0,
162
- 'bm25_score': 0,
163
- 'payload': result.payload,
164
- 'fusion_score': 0,
165
- 'contributing_queries': []
166
- }
167
-
168
- # Use max score across queries for semantic, but track which queries contributed
169
- if semantic_score > all_candidates[point_id]['semantic_score']:
170
- all_candidates[point_id]['semantic_score'] = semantic_score
171
-
172
- all_candidates[point_id]['contributing_queries'].append({
173
- 'query_idx': query_idx,
174
- 'query_text': query[:50] + '...' if len(query) > 50 else query,
175
- 'semantic_score': semantic_score,
176
- 'type': 'semantic'
177
- })
178
- query_candidates += 1
179
- semantic_count += 1
180
-
181
- except Exception as e:
182
- print(f"⚠️ Semantic search failed for query '{query[:50]}...': {e}")
183
-
184
- # 2. BM25 Search (if enabled)
185
- if ENABLE_HYBRID_SEARCH and doc_id in self.bm25_indexes:
186
- try:
187
- tokenized_query = self._tokenize_text(query)
188
- bm25_scores = self.bm25_indexes[doc_id].get_scores(tokenized_query)
189
-
190
- # Get top BM25 results with budget consideration
191
- chunk_data = self.document_chunks[doc_id]
192
- bm25_top_indices = np.argsort(bm25_scores)[::-1][:search_limit]
193
-
194
- # Process BM25 results with budget limit
195
- bm25_count = 0
196
- for idx in bm25_top_indices:
197
- if USE_TOTAL_BUDGET_APPROACH and bm25_count >= query_budget:
198
- break # Respect budget limit
199
-
200
- if idx < len(chunk_data['chunk_ids']) and bm25_scores[idx] > 0:
201
- point_id = str(chunk_data['chunk_ids'][idx])
202
- bm25_score = float(bm25_scores[idx])
203
-
204
- if point_id not in all_candidates:
205
- all_candidates[point_id] = {
206
- 'semantic_score': 0,
207
- 'bm25_score': 0,
208
- 'payload': {'text': chunk_data['chunks'][idx]},
209
- 'fusion_score': 0,
210
- 'contributing_queries': []
211
- }
212
-
213
- # Use max score across queries for BM25, but track which queries contributed
214
- if bm25_score > all_candidates[point_id]['bm25_score']:
215
- all_candidates[point_id]['bm25_score'] = bm25_score
216
-
217
- all_candidates[point_id]['contributing_queries'].append({
218
- 'query_idx': query_idx,
219
- 'query_text': query[:50] + '...' if len(query) > 50 else query,
220
- 'bm25_score': bm25_score,
221
- 'type': 'bm25'
222
- })
223
- query_candidates += 1
224
- bm25_count += 1
225
-
226
- except Exception as e:
227
- print(f"⚠️ BM25 search failed for query '{query[:50]}...': {e}")
228
-
229
- # Track query performance with budget info
230
- query_time = time.time() - query_start
231
- query_performance[query_idx] = {
232
- 'query': query[:80] + '...' if len(query) > 80 else query,
233
- 'candidates_found': query_candidates,
234
- 'budget_allocated': query_budget if USE_TOTAL_BUDGET_APPROACH else 'unlimited',
235
- 'time': query_time
236
- }
237
-
238
- # 3. Score Fusion (Reciprocal Rank Fusion + Weighted Combination)
239
- self._apply_score_fusion(all_candidates)
240
-
241
- # 4. Sort by fusion score and return top results
242
- sorted_candidates = sorted(
243
- all_candidates.items(),
244
- key=lambda x: x[1]['fusion_score'],
245
- reverse=True
246
- )
247
-
248
- # Convert to result format with enhanced metadata
249
- hybrid_results = []
250
- for point_id, data in sorted_candidates[:top_k]:
251
- hybrid_results.append({
252
- 'id': point_id,
253
- 'score': data['fusion_score'],
254
- 'payload': data['payload'],
255
- 'semantic_score': data['semantic_score'],
256
- 'bm25_score': data['bm25_score'],
257
- 'contributing_queries': data['contributing_queries']
258
- })
259
-
260
- # Log performance summary
261
- approach_name = "Total Budget" if USE_TOTAL_BUDGET_APPROACH else "Per-Query"
262
- print(f"🔍 Hybrid search completed ({approach_name} Approach):")
263
- print(f" 📊 {len(all_candidates)} total candidates from {len(queries)} focused queries")
264
- print(f" 🎯 Top {len(hybrid_results)} results selected")
265
-
266
- # Log per-query performance with budget info
267
- total_budget_used = 0
268
- for idx, perf in query_performance.items():
269
- budget_info = f" (budget: {perf['budget_allocated']})" if USE_TOTAL_BUDGET_APPROACH else ""
270
- print(f" Q{idx+1}: {perf['candidates_found']} candidates{budget_info} in {perf['time']:.3f}s")
271
- print(f" Query: {perf['query']}")
272
- if USE_TOTAL_BUDGET_APPROACH and isinstance(perf['budget_allocated'], int):
273
- total_budget_used += perf['candidates_found']
274
-
275
- if USE_TOTAL_BUDGET_APPROACH:
276
- print(f" 💰 Total budget efficiency: {total_budget_used}/{top_k} candidates used")
277
-
278
- return hybrid_results
279
-
280
- def _apply_score_fusion(self, candidates: Dict):
281
- """Apply advanced score fusion techniques."""
282
- if not candidates:
283
- return
284
-
285
- # Normalize scores
286
- semantic_scores = [data['semantic_score'] for data in candidates.values() if data['semantic_score'] > 0]
287
- bm25_scores = [data['bm25_score'] for data in candidates.values() if data['bm25_score'] > 0]
288
-
289
- # Min-Max normalization
290
- if semantic_scores:
291
- sem_min, sem_max = min(semantic_scores), max(semantic_scores)
292
- sem_range = sem_max - sem_min if sem_max > sem_min else 1
293
- else:
294
- sem_min, sem_range = 0, 1
295
-
296
- if bm25_scores:
297
- bm25_min, bm25_max = min(bm25_scores), max(bm25_scores)
298
- bm25_range = bm25_max - bm25_min if bm25_max > bm25_min else 1
299
- else:
300
- bm25_min, bm25_range = 0, 1
301
-
302
- # Calculate fusion scores
303
- for point_id, data in candidates.items():
304
- # Normalize scores
305
- norm_semantic = (data['semantic_score'] - sem_min) / sem_range if data['semantic_score'] > 0 else 0
306
- norm_bm25 = (data['bm25_score'] - bm25_min) / bm25_range if data['bm25_score'] > 0 else 0
307
-
308
- # Weighted combination
309
- if ENABLE_HYBRID_SEARCH:
310
- fusion_score = (SEMANTIC_WEIGHT * norm_semantic) + (BM25_WEIGHT * norm_bm25)
311
- else:
312
- fusion_score = norm_semantic
313
-
314
- # Add reciprocal rank fusion bonus (helps with ranking diversity)
315
- rank_bonus = 1.0 / (1.0 + max(norm_semantic, norm_bm25) * 10)
316
- fusion_score += rank_bonus * 0.1
317
-
318
- data['fusion_score'] = fusion_score
319
-
320
- def cleanup(self):
321
- """Cleanup search manager resources."""
322
- print("🧹 Cleaning up Search Manager resources...")
323
-
324
- # Close all Qdrant clients
325
- for client in self.qdrant_clients.values():
326
- try:
327
- client.close()
328
- except Exception:
329
- pass
330
-
331
- self.qdrant_clients.clear()
332
- self.bm25_indexes.clear()
333
- self.document_chunks.clear()
334
- print("✅ Search Manager cleanup completed")