File size: 7,522 Bytes
593e566 dbfc49d 593e566 dbfc49d 593e566 dbfc49d 593e566 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import os
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
class RAGSystem:
def __init__(self):
try:
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2"
)
self.vector_store = None
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
# Initialize HuggingFace model for text generation
self.llm = HuggingFaceHub(
repo_id="google/flan-t5-large",
task="text-generation",
model_kwargs={"temperature": 0.7, "max_length": 512}
)
logging.info("RAG system initialized successfully.")
except Exception as e:
logging.error(f"Failed to initialize RAG system: {str(e)}")
raise e
def initialize_knowledge_base(self, knowledge_base):
"""Initialize vector store with enhanced construction knowledge"""
try:
documents = []
# Validate knowledge base
self._validate_knowledge_base(knowledge_base)
# Add expert insights and case studies
expert_insights = self._generate_expert_insights(knowledge_base)
case_studies = self._generate_case_studies()
for damage_type, cases in knowledge_base.items():
for case in cases:
# Combine basic info with expert insights
relevant_insight = expert_insights.get(damage_type, "")
relevant_cases = case_studies.get(damage_type, "")
doc_text = f"""
Damage Type: {damage_type}
Severity: {case['severity']}
Description: {case['description']}
Technical Details: {case['description']}
Expert Insight: {relevant_insight}
Case Studies: {relevant_cases}
Repair Methods: {', '.join(case['repair_method'])}
Cost Considerations: {case['estimated_cost']}
Implementation Timeline: {case['timeframe']}
Location Specifics: {case['location']}
Required Expertise Level: {case['required_expertise']}
Emergency Protocol: {case['immediate_action']}
Preventive Measures: {case['prevention']}
Long-term Implications: Analysis of long-term structural integrity impact
Environmental Factors: Consideration of environmental conditions
"""
documents.append(doc_text)
splits = self.text_splitter.create_documents(documents)
self.vector_store = FAISS.from_documents(splits, self.embeddings)
# Initialize QA chain
self.qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.vector_store.as_retriever(),
chain_type_kwargs={
"prompt": self._get_qa_prompt()
}
)
logging.info("Knowledge base initialized successfully.")
except Exception as e:
logging.error(f"Failed to initialize knowledge base: {str(e)}")
raise e
def _validate_knowledge_base(self, knowledge_base):
"""Validate the structure of the knowledge base."""
required_keys = ['severity', 'description', 'repair_method', 'estimated_cost', 'timeframe', 'location', 'required_expertise', 'immediate_action', 'prevention']
for damage_type, cases in knowledge_base.items():
for case in cases:
for key in required_keys:
if key not in case:
raise ValueError(f"Missing required field '{key}' in {damage_type}")
logging.info("Knowledge base validation passed.")
def _get_qa_prompt(self):
"""Create a custom prompt template for the QA chain"""
template = """
Context: {context}
Question: {question}
Provide a detailed analysis considering:
1. Technical aspects
2. Safety implications
3. Cost-benefit analysis
4. Long-term considerations
5. Best practices and recommendations
Answer:
"""
return PromptTemplate(
template=template,
input_variables=["context", "question"]
)
def _generate_expert_insights(self, knowledge_base):
"""Generate expert insights for each damage type"""
insights = {}
for damage_type in knowledge_base.keys():
insights[damage_type] = f"Expert analysis for {damage_type} including latest research findings and industry best practices."
return insights
def _generate_case_studies(self):
"""Generate relevant case studies for each damage type"""
return {
"spalling": "Case studies of successful spalling repairs in similar structures",
"reinforcement_corrosion": "Examples of corrosion mitigation in harsh environments",
"structural_crack": "Analysis of crack progression and successful interventions",
"dampness": "Case studies of effective moisture control solutions",
"no_damage": "Preventive maintenance success stories"
}
def get_enhanced_analysis(self, damage_type, confidence, custom_query=None):
"""Get enhanced analysis with dynamic content generation"""
try:
if not custom_query:
base_query = f"""
Provide a comprehensive analysis for {damage_type} damage with {confidence}% confidence level.
Include technical assessment, safety implications, and expert recommendations.
"""
else:
base_query = custom_query
# Get relevant documents
results = self.qa_chain.run(base_query)
# Process and categorize the response
enhanced_info = {
"technical_details": self._extract_technical_details(results, damage_type),
"safety_considerations": self._extract_safety_considerations(results),
"expert_recommendations": self._extract_recommendations(results, confidence)
}
return enhanced_info
except Exception as e:
logging.error(f"Failed to generate enhanced analysis: {str(e)}")
return None
def _extract_technical_details(self, results, damage_type):
return [f"Detailed technical analysis for {damage_type}", results]
def _extract_safety_considerations(self, results):
return [f"Safety analysis based on current conditions", results]
def _extract_recommendations(self, results, confidence):
return [f"Prioritized recommendations based on {confidence}% confidence", results] |