from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain.llms import HuggingFaceHub import os import logging # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') class RAGSystem: def __init__(self): try: # Initialize embeddings self.embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-mpnet-base-v2" ) self.vector_store = None self.text_splitter = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=50 ) # Initialize HuggingFace model for text generation self.llm = HuggingFaceHub( repo_id="google/flan-t5-large", task="text-generation", model_kwargs={"temperature": 0.7, "max_length": 512} ) logging.info("RAG system initialized successfully.") except Exception as e: logging.error(f"Failed to initialize RAG system: {str(e)}") raise e def initialize_knowledge_base(self, knowledge_base): """Initialize vector store with enhanced construction knowledge""" try: documents = [] # Validate knowledge base self._validate_knowledge_base(knowledge_base) # Generate insights and case studies expert_insights = self._generate_expert_insights(knowledge_base) case_studies = self._generate_case_studies() for damage_type, cases in knowledge_base.items(): for idx, case in enumerate(cases): try: # Combine insights into document text relevant_insight = expert_insights.get(damage_type, "") relevant_cases = case_studies.get(damage_type, "") doc_text = f""" Damage Type: {damage_type} Severity: {case['severity']} Description: {case['description']} Technical Details: {case['description']} Expert Insight: {relevant_insight} Case Studies: {relevant_cases} Repair Methods: {', '.join(case['repair_method'])} Cost Considerations: {case['estimated_cost']} Implementation Timeline: {case['timeframe']} Location Specifics: {case['location']} Required Expertise Level: {case['required_expertise']} Emergency Protocol: {case['immediate_action']} Preventive Measures: {case['prevention']} """ documents.append(doc_text) except KeyError as e: logging.warning(f"Missing key {str(e)} in {damage_type}, case {idx + 1}. Skipping.") if not documents: raise ValueError("No valid documents to process.") splits = self.text_splitter.create_documents(documents) self.vector_store = FAISS.from_documents(splits, self.embeddings) # Initialize QA chain self.qa_chain = RetrievalQA.from_chain_type( llm=self.llm, chain_type="stuff", retriever=self.vector_store.as_retriever(), chain_type_kwargs={ "prompt": self._get_qa_prompt() } ) logging.info("Knowledge base initialized successfully.") except Exception as e: logging.error(f"Failed to initialize knowledge base: {str(e)}") raise e def _validate_knowledge_base(self, knowledge_base): """Validate the structure of the knowledge base.""" required_keys = ['severity', 'description', 'repair_method', 'estimated_cost', 'timeframe', 'location', 'required_expertise', 'immediate_action', 'prevention'] for damage_type, cases in knowledge_base.items(): for idx, case in enumerate(cases): for key in required_keys: if key not in case: logging.error(f"Missing required field '{key}' in {damage_type}, case {idx + 1}") raise ValueError(f"Missing required field '{key}' in {damage_type}, case {idx + 1}") logging.info("Knowledge base validation passed.") def _get_qa_prompt(self): """Create a custom prompt template for the QA chain""" template = """ Context: {context} Question: {question} Provide a detailed analysis considering: 1. Technical aspects 2. Safety implications 3. Cost-benefit analysis 4. Long-term considerations 5. Best practices and recommendations Answer: """ return PromptTemplate( template=template, input_variables=["context", "question"] ) def get_enhanced_analysis(self, damage_type, confidence, custom_query=None): """Get enhanced analysis with dynamic content generation""" try: if not self.vector_store: raise ValueError("Vector store is not initialized.") if not custom_query: base_query = f""" Provide a comprehensive analysis for {damage_type} damage with {confidence}% confidence level. Include technical assessment, safety implications, and expert recommendations. """ else: base_query = custom_query # Get relevant documents results = self.qa_chain.run(base_query) if not results: logging.warning("No results returned for query.") return {"technical_details": [], "safety_considerations": [], "expert_recommendations": []} # Process and categorize the response enhanced_info = { "technical_details": self._extract_technical_details(results, damage_type), "safety_considerations": self._extract_safety_considerations(results), "expert_recommendations": self._extract_recommendations(results, confidence) } return enhanced_info except Exception as e: logging.error(f"Failed to generate enhanced analysis: {str(e)}") return {"technical_details": [], "safety_considerations": [], "expert_recommendations": []}