File size: 6,948 Bytes
593e566 dbfc49d 593e566 3b1af89 593e566 3b1af89 593e566 3b1af89 593e566 3b1af89 593e566 3b1af89 593e566 3b1af89 593e566 3b1af89 593e566 3b1af89 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFaceHub
import os
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
class RAGSystem:
def __init__(self):
try:
# Initialize embeddings
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2"
)
self.vector_store = None
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
# Initialize HuggingFace model for text generation
self.llm = HuggingFaceHub(
repo_id="google/flan-t5-large",
task="text-generation",
model_kwargs={"temperature": 0.7, "max_length": 512}
)
logging.info("RAG system initialized successfully.")
except Exception as e:
logging.error(f"Failed to initialize RAG system: {str(e)}")
raise e
def initialize_knowledge_base(self, knowledge_base):
"""Initialize vector store with enhanced construction knowledge"""
try:
documents = []
# Validate knowledge base
self._validate_knowledge_base(knowledge_base)
# Generate insights and case studies
expert_insights = self._generate_expert_insights(knowledge_base)
case_studies = self._generate_case_studies()
for damage_type, cases in knowledge_base.items():
for idx, case in enumerate(cases):
try:
# Combine insights into document text
relevant_insight = expert_insights.get(damage_type, "")
relevant_cases = case_studies.get(damage_type, "")
doc_text = f"""
Damage Type: {damage_type}
Severity: {case['severity']}
Description: {case['description']}
Technical Details: {case['description']}
Expert Insight: {relevant_insight}
Case Studies: {relevant_cases}
Repair Methods: {', '.join(case['repair_method'])}
Cost Considerations: {case['estimated_cost']}
Implementation Timeline: {case['timeframe']}
Location Specifics: {case['location']}
Required Expertise Level: {case['required_expertise']}
Emergency Protocol: {case['immediate_action']}
Preventive Measures: {case['prevention']}
"""
documents.append(doc_text)
except KeyError as e:
logging.warning(f"Missing key {str(e)} in {damage_type}, case {idx + 1}. Skipping.")
if not documents:
raise ValueError("No valid documents to process.")
splits = self.text_splitter.create_documents(documents)
self.vector_store = FAISS.from_documents(splits, self.embeddings)
# Initialize QA chain
self.qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
chain_type="stuff",
retriever=self.vector_store.as_retriever(),
chain_type_kwargs={
"prompt": self._get_qa_prompt()
}
)
logging.info("Knowledge base initialized successfully.")
except Exception as e:
logging.error(f"Failed to initialize knowledge base: {str(e)}")
raise e
def _validate_knowledge_base(self, knowledge_base):
"""Validate the structure of the knowledge base."""
required_keys = ['severity', 'description', 'repair_method', 'estimated_cost', 'timeframe', 'location', 'required_expertise', 'immediate_action', 'prevention']
for damage_type, cases in knowledge_base.items():
for idx, case in enumerate(cases):
for key in required_keys:
if key not in case:
logging.error(f"Missing required field '{key}' in {damage_type}, case {idx + 1}")
raise ValueError(f"Missing required field '{key}' in {damage_type}, case {idx + 1}")
logging.info("Knowledge base validation passed.")
def _get_qa_prompt(self):
"""Create a custom prompt template for the QA chain"""
template = """
Context: {context}
Question: {question}
Provide a detailed analysis considering:
1. Technical aspects
2. Safety implications
3. Cost-benefit analysis
4. Long-term considerations
5. Best practices and recommendations
Answer:
"""
return PromptTemplate(
template=template,
input_variables=["context", "question"]
)
def get_enhanced_analysis(self, damage_type, confidence, custom_query=None):
"""Get enhanced analysis with dynamic content generation"""
try:
if not self.vector_store:
raise ValueError("Vector store is not initialized.")
if not custom_query:
base_query = f"""
Provide a comprehensive analysis for {damage_type} damage with {confidence}% confidence level.
Include technical assessment, safety implications, and expert recommendations.
"""
else:
base_query = custom_query
# Get relevant documents
results = self.qa_chain.run(base_query)
if not results:
logging.warning("No results returned for query.")
return {"technical_details": [], "safety_considerations": [], "expert_recommendations": []}
# Process and categorize the response
enhanced_info = {
"technical_details": self._extract_technical_details(results, damage_type),
"safety_considerations": self._extract_safety_considerations(results),
"expert_recommendations": self._extract_recommendations(results, confidence)
}
return enhanced_info
except Exception as e:
logging.error(f"Failed to generate enhanced analysis: {str(e)}")
return {"technical_details": [], "safety_considerations": [], "expert_recommendations": []} |