from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import os import logging # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') class RAGSystem: def __init__(self): try: self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-mpnet-base-v2" ) self.vector_store = None self.text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) # Initialize HuggingFace model for text generation self.llm = HuggingFaceHub( repo_id="google/flan-t5-large", task="text-generation", model_kwargs={"temperature": 0.7, "max_length": 512} ) logging.info("RAG system initialized successfully.") except Exception as e: logging.error(f"Failed to initialize RAG system: {str(e)}") raise e def initialize_knowledge_base(self, knowledge_base): """Initialize vector store with enhanced construction knowledge""" try: documents = [] # Validate knowledge base self._validate_knowledge_base(knowledge_base) # Add expert insights and case studies expert_insights = self._generate_expert_insights(knowledge_base) case_studies = self._generate_case_studies() for damage_type, cases in knowledge_base.items(): for case in cases: # Combine basic info with expert insights relevant_insight = expert_insights.get(damage_type, "") relevant_cases = case_studies.get(damage_type, "") doc_text = f""" Damage Type: {damage_type} Severity: {case['severity']} Description: {case['description']} Technical Details: {case['description']} Expert Insight: {relevant_insight} Case Studies: {relevant_cases} Repair Methods: {', '.join(case['repair_method'])} Cost Considerations: {case['estimated_cost']} Implementation Timeline: {case['timeframe']} Location Specifics: {case['location']} Required Expertise Level: {case['required_expertise']} Emergency Protocol: {case['immediate_action']} Preventive Measures: {case['prevention']} Long-term Implications: Analysis of long-term structural integrity impact Environmental Factors: Consideration of environmental conditions """ documents.append(doc_text) splits = self.text_splitter.create_documents(documents) self.vector_store = FAISS.from_documents(splits, self.embeddings) # Initialize QA chain self.qa_chain = RetrievalQA.from_chain_type( llm=self.llm, chain_type="stuff", retriever=self.vector_store.as_retriever(), chain_type_kwargs={ "prompt": self._get_qa_prompt() } ) logging.info("Knowledge base initialized successfully.") except Exception as e: logging.error(f"Failed to initialize knowledge base: {str(e)}") raise e def _validate_knowledge_base(self, knowledge_base): """Validate the structure of the knowledge base.""" required_keys = ['severity', 'description', 'repair_method', 'estimated_cost', 'timeframe', 'location', 'required_expertise', 'immediate_action', 'prevention'] for damage_type, cases in knowledge_base.items(): for case in cases: for key in required_keys: if key not in case: raise ValueError(f"Missing required field '{key}' in {damage_type}") logging.info("Knowledge base validation passed.") def _get_qa_prompt(self): """Create a custom prompt template for the QA chain""" template = """ Context: {context} Question: {question} Provide a detailed analysis considering: 1. Technical aspects 2. Safety implications 3. Cost-benefit analysis 4. Long-term considerations 5. Best practices and recommendations Answer: """ return PromptTemplate( template=template, input_variables=["context", "question"] ) def _generate_expert_insights(self, knowledge_base): """Generate expert insights for each damage type""" insights = {} for damage_type in knowledge_base.keys(): insights[damage_type] = f"Expert analysis for {damage_type} including latest research findings and industry best practices." return insights def _generate_case_studies(self): """Generate relevant case studies for each damage type""" return { "spalling": "Case studies of successful spalling repairs in similar structures", "reinforcement_corrosion": "Examples of corrosion mitigation in harsh environments", "structural_crack": "Analysis of crack progression and successful interventions", "dampness": "Case studies of effective moisture control solutions", "no_damage": "Preventive maintenance success stories" } def get_enhanced_analysis(self, damage_type, confidence, custom_query=None): """Get enhanced analysis with dynamic content generation""" try: if not custom_query: base_query = f""" Provide a comprehensive analysis for {damage_type} damage with {confidence}% confidence level. Include technical assessment, safety implications, and expert recommendations. """ else: base_query = custom_query # Get relevant documents results = self.qa_chain.run(base_query) # Process and categorize the response enhanced_info = { "technical_details": self._extract_technical_details(results, damage_type), "safety_considerations": self._extract_safety_considerations(results), "expert_recommendations": self._extract_recommendations(results, confidence) } return enhanced_info except Exception as e: logging.error(f"Failed to generate enhanced analysis: {str(e)}") return None def _extract_technical_details(self, results, damage_type): return [f"Detailed technical analysis for {damage_type}", results] def _extract_safety_considerations(self, results): return [f"Safety analysis based on current conditions", results] def _extract_recommendations(self, results, confidence): return [f"Prioritized recommendations based on {confidence}% confidence", results]