File size: 8,317 Bytes
1413086
593e566
 
 
1413086
dbfc49d
1413086
 
 
 
dbfc49d
1413086
 
 
dbfc49d
593e566
 
1413086
593e566
 
1413086
 
593e566
 
 
 
1413086
 
593e566
1413086
593e566
1413086
 
593e566
1413086
 
 
593e566
 
1413086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1af89
1413086
 
 
 
 
 
 
 
 
 
 
593e566
1413086
 
593e566
1413086
 
 
 
593e566
1413086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593e566
1413086
 
 
 
 
 
 
 
 
 
593e566
3b1af89
1413086
3b1af89
1413086
 
 
593e566
1413086
 
 
 
 
593e566
1413086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593e566
1413086
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593e566
1413086
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# rag_utils.py
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
import logging
from typing import List, Dict, Any
import numpy as np
from tqdm import tqdm
import streamlit as st

# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class RAGSystem:
    def __init__(self):
        """Initialize RAG system with custom embeddings and configurations"""
        try:
            self.embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-mpnet-base-v2",
                model_kwargs={'device': 'cuda' if st.cuda.is_available() else 'cpu'}
            )
            self.vector_store = None
            self.text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=500,
                chunk_overlap=50,
                separators=["\n\n", "\n", ". ", ", ", " ", ""]
            )
            logger.info("RAG system initialized successfully")
        except Exception as e:
            logger.error(f"Error initializing RAG system: {str(e)}")
            raise

    def _create_documents(self, knowledge_base: Dict) -> List[Document]:
        """Create documents from knowledge base with structured format"""
        documents = []
        try:
            for damage_type, cases in knowledge_base.items():
                for case in cases:
                    # Create a detailed document for each case
                    technical_info = f"""
                    Technical Analysis for {damage_type}:
                    Severity Level: {case['severity']}
                    Detailed Description: {case['description']}
                    Primary Location: {case['location']}
                    Required Expertise: {case['required_expertise']}
                    """

                    repair_info = f"""
                    Repair and Maintenance Information:
                    Repair Methods: {' -> '.join(case['repair_method'])}
                    Estimated Cost Range: {case['estimated_cost']}
                    Expected Timeframe: {case['timeframe']}
                    """

                    safety_info = f"""
                    Safety and Prevention Guidelines:
                    Immediate Actions Required: {case['immediate_action']}
                    Preventive Measures: {case['prevention']}
                    Critical Considerations: Special attention needed for {damage_type} in {case['location']}
                    """

                    # Combine all information
                    doc_text = f"{technical_info}\n{repair_info}\n{safety_info}"
                    
                    # Create metadata for better retrieval
                    metadata = {
                        'damage_type': damage_type,
                        'severity': case['severity'],
                        'location': case['location'],
                        'document_type': 'construction_damage_analysis'
                    }

                    documents.append(Document(
                        page_content=doc_text,
                        metadata=metadata
                    ))
            
            logger.info(f"Created {len(documents)} documents from knowledge base")
            return documents
        except Exception as e:
            logger.error(f"Error creating documents: {str(e)}")
            raise

    def initialize_knowledge_base(self, knowledge_base: Dict):
        """Initialize vector store with construction knowledge"""
        try:
            # Create documents
            documents = self._create_documents(knowledge_base)
            
            # Split documents into chunks
            splits = self.text_splitter.split_documents(documents)
            
            # Create vector store
            self.vector_store = FAISS.from_documents(
                documents=splits,
                embedding=self.embeddings
            )
            
            logger.info("Knowledge base initialized successfully")
        except Exception as e:
            logger.error(f"Error initializing knowledge base: {str(e)}")
            raise

    def _format_response(self, docs: List[Document], damage_type: str, confidence: float) -> Dict[str, List[str]]:
        """Format retrieved documents into structured response"""
        response = {
            "technical_details": [],
            "safety_considerations": [],
            "expert_recommendations": []
        }

        try:
            for doc in docs:
                content = doc.page_content
                # Parse technical details
                if "Technical Analysis" in content:
                    response["technical_details"].append(
                        f"For {damage_type} (Confidence: {confidence:.1f}%):\n" +
                        content.split("Technical Analysis")[1].split("Repair")[0].strip()
                    )
                
                # Parse safety considerations
                if "Safety and Prevention" in content:
                    response["safety_considerations"].append(
                        content.split("Safety and Prevention")[1].strip()
                    )
                
                # Parse repair recommendations
                if "Repair and Maintenance" in content:
                    response["expert_recommendations"].append(
                        content.split("Repair and Maintenance")[1].split("Safety")[0].strip()
                    )
            
            return response
        except Exception as e:
            logger.error(f"Error formatting response: {str(e)}")
            raise

    def get_enhanced_analysis(
        self, 
        damage_type: str, 
        confidence: float, 
        custom_query: str = None
    ) -> Dict[str, List[str]]:
        """Get enhanced analysis with optional custom query support"""
        try:
            if not self.vector_store:
                raise ValueError("Vector store not initialized")

            # Prepare query
            if custom_query:
                query = f"{custom_query} for {damage_type} damage"
            else:
                query = f"""
                Provide detailed analysis for {damage_type} damage with {confidence}% confidence level.
                Include technical assessment, safety considerations, and repair recommendations.
                """

            # Get relevant documents
            docs = self.vector_store.similarity_search(
                query=query,
                k=3,  # Get top 3 most relevant documents
                fetch_k=5  # Fetch top 5 for better diversity
            )

            # Format and return response
            return self._format_response(docs, damage_type, confidence)

        except Exception as e:
            logger.error(f"Error getting enhanced analysis: {str(e)}")
            return {
                "technical_details": [f"Error retrieving analysis: {str(e)}"],
                "safety_considerations": ["Please try again or contact support."],
                "expert_recommendations": ["System currently unavailable."]
            }

    def get_similar_cases(self, damage_type: str, confidence: float) -> List[Dict[str, Any]]:
        """Get similar damage cases for comparison"""
        try:
            if not self.vector_store:
                raise ValueError("Vector store not initialized")

            query = f"Find similar cases of {damage_type} damage"
            docs = self.vector_store.similarity_search(query, k=3)
            
            similar_cases = []
            for doc in docs:
                if doc.metadata['damage_type'] != damage_type:  # Avoid same damage type
                    similar_cases.append({
                        'damage_type': doc.metadata['damage_type'],
                        'severity': doc.metadata['severity'],
                        'location': doc.metadata['location'],
                        'details': doc.page_content[:200] + '...'  # First 200 chars
                    })
            
            return similar_cases
        except Exception as e:
            logger.error(f"Error getting similar cases: {str(e)}")
            return []