File size: 6,948 Bytes
593e566
 
 
 
 
 
 
dbfc49d
 
 
 
 
593e566
 
 
3b1af89
593e566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1af89
593e566
 
 
 
3b1af89
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593e566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1af89
593e566
 
3b1af89
 
593e566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3b1af89
 
 
593e566
 
 
 
 
 
 
 
 
 
3b1af89
 
 
593e566
 
 
 
 
 
 
 
 
 
3b1af89
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import os
import logging

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

class RAGSystem:
    def __init__(self):
        try:
            # Initialize embeddings
            self.embeddings = HuggingFaceEmbeddings(
                model_name="sentence-transformers/all-mpnet-base-v2"
            )
            self.vector_store = None
            self.text_splitter = RecursiveCharacterTextSplitter(
                chunk_size=500,
                chunk_overlap=50
            )
            # Initialize HuggingFace model for text generation
            self.llm = HuggingFaceHub(
                repo_id="google/flan-t5-large",
                task="text-generation",
                model_kwargs={"temperature": 0.7, "max_length": 512}
            )
            logging.info("RAG system initialized successfully.")
        except Exception as e:
            logging.error(f"Failed to initialize RAG system: {str(e)}")
            raise e

    def initialize_knowledge_base(self, knowledge_base):
        """Initialize vector store with enhanced construction knowledge"""
        try:
            documents = []
            # Validate knowledge base
            self._validate_knowledge_base(knowledge_base)
            
            # Generate insights and case studies
            expert_insights = self._generate_expert_insights(knowledge_base)
            case_studies = self._generate_case_studies()
            
            for damage_type, cases in knowledge_base.items():
                for idx, case in enumerate(cases):
                    try:
                        # Combine insights into document text
                        relevant_insight = expert_insights.get(damage_type, "")
                        relevant_cases = case_studies.get(damage_type, "")
                        
                        doc_text = f"""
                        Damage Type: {damage_type}
                        Severity: {case['severity']}
                        Description: {case['description']}
                        Technical Details: {case['description']}
                        Expert Insight: {relevant_insight}
                        Case Studies: {relevant_cases}
                        Repair Methods: {', '.join(case['repair_method'])}
                        Cost Considerations: {case['estimated_cost']}
                        Implementation Timeline: {case['timeframe']}
                        Location Specifics: {case['location']}
                        Required Expertise Level: {case['required_expertise']}
                        Emergency Protocol: {case['immediate_action']}
                        Preventive Measures: {case['prevention']}
                        """
                        documents.append(doc_text)
                    except KeyError as e:
                        logging.warning(f"Missing key {str(e)} in {damage_type}, case {idx + 1}. Skipping.")
            
            if not documents:
                raise ValueError("No valid documents to process.")
            
            splits = self.text_splitter.create_documents(documents)
            self.vector_store = FAISS.from_documents(splits, self.embeddings)
            
            # Initialize QA chain
            self.qa_chain = RetrievalQA.from_chain_type(
                llm=self.llm,
                chain_type="stuff",
                retriever=self.vector_store.as_retriever(),
                chain_type_kwargs={
                    "prompt": self._get_qa_prompt()
                }
            )
            logging.info("Knowledge base initialized successfully.")
        except Exception as e:
            logging.error(f"Failed to initialize knowledge base: {str(e)}")
            raise e

    def _validate_knowledge_base(self, knowledge_base):
        """Validate the structure of the knowledge base."""
        required_keys = ['severity', 'description', 'repair_method', 'estimated_cost', 'timeframe', 'location', 'required_expertise', 'immediate_action', 'prevention']
        for damage_type, cases in knowledge_base.items():
            for idx, case in enumerate(cases):
                for key in required_keys:
                    if key not in case:
                        logging.error(f"Missing required field '{key}' in {damage_type}, case {idx + 1}")
                        raise ValueError(f"Missing required field '{key}' in {damage_type}, case {idx + 1}")
        logging.info("Knowledge base validation passed.")

    def _get_qa_prompt(self):
        """Create a custom prompt template for the QA chain"""
        template = """
        Context: {context}
        
        Question: {question}
        
        Provide a detailed analysis considering:
        1. Technical aspects
        2. Safety implications
        3. Cost-benefit analysis
        4. Long-term considerations
        5. Best practices and recommendations
        
        Answer:
        """
        return PromptTemplate(
            template=template,
            input_variables=["context", "question"]
        )

    def get_enhanced_analysis(self, damage_type, confidence, custom_query=None):
        """Get enhanced analysis with dynamic content generation"""
        try:
            if not self.vector_store:
                raise ValueError("Vector store is not initialized.")

            if not custom_query:
                base_query = f"""
                Provide a comprehensive analysis for {damage_type} damage with {confidence}% confidence level.
                Include technical assessment, safety implications, and expert recommendations.
                """
            else:
                base_query = custom_query
            
            # Get relevant documents
            results = self.qa_chain.run(base_query)
            if not results:
                logging.warning("No results returned for query.")
                return {"technical_details": [], "safety_considerations": [], "expert_recommendations": []}
            
            # Process and categorize the response
            enhanced_info = {
                "technical_details": self._extract_technical_details(results, damage_type),
                "safety_considerations": self._extract_safety_considerations(results),
                "expert_recommendations": self._extract_recommendations(results, confidence)
            }
            return enhanced_info
        except Exception as e:
            logging.error(f"Failed to generate enhanced analysis: {str(e)}")
            return {"technical_details": [], "safety_considerations": [], "expert_recommendations": []}