File size: 8,317 Bytes
1413086 593e566 1413086 dbfc49d 1413086 dbfc49d 1413086 dbfc49d 593e566 1413086 593e566 1413086 593e566 1413086 593e566 1413086 593e566 1413086 593e566 1413086 593e566 1413086 3b1af89 1413086 593e566 1413086 593e566 1413086 593e566 1413086 593e566 1413086 593e566 3b1af89 1413086 3b1af89 1413086 593e566 1413086 593e566 1413086 593e566 1413086 593e566 1413086 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
# rag_utils.py
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
import logging
from typing import List, Dict, Any
import numpy as np
from tqdm import tqdm
import streamlit as st
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class RAGSystem:
def __init__(self):
"""Initialize RAG system with custom embeddings and configurations"""
try:
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2",
model_kwargs={'device': 'cuda' if st.cuda.is_available() else 'cpu'}
)
self.vector_store = None
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50,
separators=["\n\n", "\n", ". ", ", ", " ", ""]
)
logger.info("RAG system initialized successfully")
except Exception as e:
logger.error(f"Error initializing RAG system: {str(e)}")
raise
def _create_documents(self, knowledge_base: Dict) -> List[Document]:
"""Create documents from knowledge base with structured format"""
documents = []
try:
for damage_type, cases in knowledge_base.items():
for case in cases:
# Create a detailed document for each case
technical_info = f"""
Technical Analysis for {damage_type}:
Severity Level: {case['severity']}
Detailed Description: {case['description']}
Primary Location: {case['location']}
Required Expertise: {case['required_expertise']}
"""
repair_info = f"""
Repair and Maintenance Information:
Repair Methods: {' -> '.join(case['repair_method'])}
Estimated Cost Range: {case['estimated_cost']}
Expected Timeframe: {case['timeframe']}
"""
safety_info = f"""
Safety and Prevention Guidelines:
Immediate Actions Required: {case['immediate_action']}
Preventive Measures: {case['prevention']}
Critical Considerations: Special attention needed for {damage_type} in {case['location']}
"""
# Combine all information
doc_text = f"{technical_info}\n{repair_info}\n{safety_info}"
# Create metadata for better retrieval
metadata = {
'damage_type': damage_type,
'severity': case['severity'],
'location': case['location'],
'document_type': 'construction_damage_analysis'
}
documents.append(Document(
page_content=doc_text,
metadata=metadata
))
logger.info(f"Created {len(documents)} documents from knowledge base")
return documents
except Exception as e:
logger.error(f"Error creating documents: {str(e)}")
raise
def initialize_knowledge_base(self, knowledge_base: Dict):
"""Initialize vector store with construction knowledge"""
try:
# Create documents
documents = self._create_documents(knowledge_base)
# Split documents into chunks
splits = self.text_splitter.split_documents(documents)
# Create vector store
self.vector_store = FAISS.from_documents(
documents=splits,
embedding=self.embeddings
)
logger.info("Knowledge base initialized successfully")
except Exception as e:
logger.error(f"Error initializing knowledge base: {str(e)}")
raise
def _format_response(self, docs: List[Document], damage_type: str, confidence: float) -> Dict[str, List[str]]:
"""Format retrieved documents into structured response"""
response = {
"technical_details": [],
"safety_considerations": [],
"expert_recommendations": []
}
try:
for doc in docs:
content = doc.page_content
# Parse technical details
if "Technical Analysis" in content:
response["technical_details"].append(
f"For {damage_type} (Confidence: {confidence:.1f}%):\n" +
content.split("Technical Analysis")[1].split("Repair")[0].strip()
)
# Parse safety considerations
if "Safety and Prevention" in content:
response["safety_considerations"].append(
content.split("Safety and Prevention")[1].strip()
)
# Parse repair recommendations
if "Repair and Maintenance" in content:
response["expert_recommendations"].append(
content.split("Repair and Maintenance")[1].split("Safety")[0].strip()
)
return response
except Exception as e:
logger.error(f"Error formatting response: {str(e)}")
raise
def get_enhanced_analysis(
self,
damage_type: str,
confidence: float,
custom_query: str = None
) -> Dict[str, List[str]]:
"""Get enhanced analysis with optional custom query support"""
try:
if not self.vector_store:
raise ValueError("Vector store not initialized")
# Prepare query
if custom_query:
query = f"{custom_query} for {damage_type} damage"
else:
query = f"""
Provide detailed analysis for {damage_type} damage with {confidence}% confidence level.
Include technical assessment, safety considerations, and repair recommendations.
"""
# Get relevant documents
docs = self.vector_store.similarity_search(
query=query,
k=3, # Get top 3 most relevant documents
fetch_k=5 # Fetch top 5 for better diversity
)
# Format and return response
return self._format_response(docs, damage_type, confidence)
except Exception as e:
logger.error(f"Error getting enhanced analysis: {str(e)}")
return {
"technical_details": [f"Error retrieving analysis: {str(e)}"],
"safety_considerations": ["Please try again or contact support."],
"expert_recommendations": ["System currently unavailable."]
}
def get_similar_cases(self, damage_type: str, confidence: float) -> List[Dict[str, Any]]:
"""Get similar damage cases for comparison"""
try:
if not self.vector_store:
raise ValueError("Vector store not initialized")
query = f"Find similar cases of {damage_type} damage"
docs = self.vector_store.similarity_search(query, k=3)
similar_cases = []
for doc in docs:
if doc.metadata['damage_type'] != damage_type: # Avoid same damage type
similar_cases.append({
'damage_type': doc.metadata['damage_type'],
'severity': doc.metadata['severity'],
'location': doc.metadata['location'],
'details': doc.page_content[:200] + '...' # First 200 chars
})
return similar_cases
except Exception as e:
logger.error(f"Error getting similar cases: {str(e)}")
return []
|