|
|
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.docstore.document import Document |
|
import logging |
|
from typing import List, Dict, Any |
|
import numpy as np |
|
from tqdm import tqdm |
|
import streamlit as st |
|
|
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
class RAGSystem: |
|
def __init__(self): |
|
"""Initialize RAG system with custom embeddings and configurations""" |
|
try: |
|
self.embeddings = HuggingFaceEmbeddings( |
|
model_name="sentence-transformers/all-mpnet-base-v2", |
|
model_kwargs={'device': 'cuda' if st.cuda.is_available() else 'cpu'} |
|
) |
|
self.vector_store = None |
|
self.text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=500, |
|
chunk_overlap=50, |
|
separators=["\n\n", "\n", ". ", ", ", " ", ""] |
|
) |
|
logger.info("RAG system initialized successfully") |
|
except Exception as e: |
|
logger.error(f"Error initializing RAG system: {str(e)}") |
|
raise |
|
|
|
def _create_documents(self, knowledge_base: Dict) -> List[Document]: |
|
"""Create documents from knowledge base with structured format""" |
|
documents = [] |
|
try: |
|
for damage_type, cases in knowledge_base.items(): |
|
for case in cases: |
|
|
|
technical_info = f""" |
|
Technical Analysis for {damage_type}: |
|
Severity Level: {case['severity']} |
|
Detailed Description: {case['description']} |
|
Primary Location: {case['location']} |
|
Required Expertise: {case['required_expertise']} |
|
""" |
|
|
|
repair_info = f""" |
|
Repair and Maintenance Information: |
|
Repair Methods: {' -> '.join(case['repair_method'])} |
|
Estimated Cost Range: {case['estimated_cost']} |
|
Expected Timeframe: {case['timeframe']} |
|
""" |
|
|
|
safety_info = f""" |
|
Safety and Prevention Guidelines: |
|
Immediate Actions Required: {case['immediate_action']} |
|
Preventive Measures: {case['prevention']} |
|
Critical Considerations: Special attention needed for {damage_type} in {case['location']} |
|
""" |
|
|
|
|
|
doc_text = f"{technical_info}\n{repair_info}\n{safety_info}" |
|
|
|
|
|
metadata = { |
|
'damage_type': damage_type, |
|
'severity': case['severity'], |
|
'location': case['location'], |
|
'document_type': 'construction_damage_analysis' |
|
} |
|
|
|
documents.append(Document( |
|
page_content=doc_text, |
|
metadata=metadata |
|
)) |
|
|
|
logger.info(f"Created {len(documents)} documents from knowledge base") |
|
return documents |
|
except Exception as e: |
|
logger.error(f"Error creating documents: {str(e)}") |
|
raise |
|
|
|
def initialize_knowledge_base(self, knowledge_base: Dict): |
|
"""Initialize vector store with construction knowledge""" |
|
try: |
|
|
|
documents = self._create_documents(knowledge_base) |
|
|
|
|
|
splits = self.text_splitter.split_documents(documents) |
|
|
|
|
|
self.vector_store = FAISS.from_documents( |
|
documents=splits, |
|
embedding=self.embeddings |
|
) |
|
|
|
logger.info("Knowledge base initialized successfully") |
|
except Exception as e: |
|
logger.error(f"Error initializing knowledge base: {str(e)}") |
|
raise |
|
|
|
def _format_response(self, docs: List[Document], damage_type: str, confidence: float) -> Dict[str, List[str]]: |
|
"""Format retrieved documents into structured response""" |
|
response = { |
|
"technical_details": [], |
|
"safety_considerations": [], |
|
"expert_recommendations": [] |
|
} |
|
|
|
try: |
|
for doc in docs: |
|
content = doc.page_content |
|
|
|
if "Technical Analysis" in content: |
|
response["technical_details"].append( |
|
f"For {damage_type} (Confidence: {confidence:.1f}%):\n" + |
|
content.split("Technical Analysis")[1].split("Repair")[0].strip() |
|
) |
|
|
|
|
|
if "Safety and Prevention" in content: |
|
response["safety_considerations"].append( |
|
content.split("Safety and Prevention")[1].strip() |
|
) |
|
|
|
|
|
if "Repair and Maintenance" in content: |
|
response["expert_recommendations"].append( |
|
content.split("Repair and Maintenance")[1].split("Safety")[0].strip() |
|
) |
|
|
|
return response |
|
except Exception as e: |
|
logger.error(f"Error formatting response: {str(e)}") |
|
raise |
|
|
|
def get_enhanced_analysis( |
|
self, |
|
damage_type: str, |
|
confidence: float, |
|
custom_query: str = None |
|
) -> Dict[str, List[str]]: |
|
"""Get enhanced analysis with optional custom query support""" |
|
try: |
|
if not self.vector_store: |
|
raise ValueError("Vector store not initialized") |
|
|
|
|
|
if custom_query: |
|
query = f"{custom_query} for {damage_type} damage" |
|
else: |
|
query = f""" |
|
Provide detailed analysis for {damage_type} damage with {confidence}% confidence level. |
|
Include technical assessment, safety considerations, and repair recommendations. |
|
""" |
|
|
|
|
|
docs = self.vector_store.similarity_search( |
|
query=query, |
|
k=3, |
|
fetch_k=5 |
|
) |
|
|
|
|
|
return self._format_response(docs, damage_type, confidence) |
|
|
|
except Exception as e: |
|
logger.error(f"Error getting enhanced analysis: {str(e)}") |
|
return { |
|
"technical_details": [f"Error retrieving analysis: {str(e)}"], |
|
"safety_considerations": ["Please try again or contact support."], |
|
"expert_recommendations": ["System currently unavailable."] |
|
} |
|
|
|
def get_similar_cases(self, damage_type: str, confidence: float) -> List[Dict[str, Any]]: |
|
"""Get similar damage cases for comparison""" |
|
try: |
|
if not self.vector_store: |
|
raise ValueError("Vector store not initialized") |
|
|
|
query = f"Find similar cases of {damage_type} damage" |
|
docs = self.vector_store.similarity_search(query, k=3) |
|
|
|
similar_cases = [] |
|
for doc in docs: |
|
if doc.metadata['damage_type'] != damage_type: |
|
similar_cases.append({ |
|
'damage_type': doc.metadata['damage_type'], |
|
'severity': doc.metadata['severity'], |
|
'location': doc.metadata['location'], |
|
'details': doc.page_content[:200] + '...' |
|
}) |
|
|
|
return similar_cases |
|
except Exception as e: |
|
logger.error(f"Error getting similar cases: {str(e)}") |
|
return [] |
|
|