Spaces:

Shakir60
/

smart

Sleeping

App Files Files Community

smart / rag_utils.py

Shakir60

Update rag_utils.py

1413086 verified 6 months ago

raw

history blame contribute delete

8.32 kB

	# rag_utils.py
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.docstore.document import Document
	import logging
	from typing import List, Dict, Any
	import numpy as np
	from tqdm import tqdm
	import streamlit as st

	# Set up logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class RAGSystem:
	def __init__(self):
	"""Initialize RAG system with custom embeddings and configurations"""
	try:
	self.embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-mpnet-base-v2",
	model_kwargs={'device': 'cuda' if st.cuda.is_available() else 'cpu'}
	)
	self.vector_store = None
	self.text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=50,
	separators=["\n\n", "\n", ". ", ", ", " ", ""]
	)
	logger.info("RAG system initialized successfully")
	except Exception as e:
	logger.error(f"Error initializing RAG system: {str(e)}")
	raise

	def _create_documents(self, knowledge_base: Dict) -> List[Document]:
	"""Create documents from knowledge base with structured format"""
	documents = []
	try:
	for damage_type, cases in knowledge_base.items():
	for case in cases:
	# Create a detailed document for each case
	technical_info = f"""
	Technical Analysis for {damage_type}:
	Severity Level: {case['severity']}
	Detailed Description: {case['description']}
	Primary Location: {case['location']}
	Required Expertise: {case['required_expertise']}
	"""

	repair_info = f"""
	Repair and Maintenance Information:
	Repair Methods: {' -> '.join(case['repair_method'])}
	Estimated Cost Range: {case['estimated_cost']}
	Expected Timeframe: {case['timeframe']}
	"""

	safety_info = f"""
	Safety and Prevention Guidelines:
	Immediate Actions Required: {case['immediate_action']}
	Preventive Measures: {case['prevention']}
	Critical Considerations: Special attention needed for {damage_type} in {case['location']}
	"""

	# Combine all information
	doc_text = f"{technical_info}\n{repair_info}\n{safety_info}"

	# Create metadata for better retrieval
	metadata = {
	'damage_type': damage_type,
	'severity': case['severity'],
	'location': case['location'],
	'document_type': 'construction_damage_analysis'
	}

	documents.append(Document(
	page_content=doc_text,
	metadata=metadata
	))

	logger.info(f"Created {len(documents)} documents from knowledge base")
	return documents
	except Exception as e:
	logger.error(f"Error creating documents: {str(e)}")
	raise

	def initialize_knowledge_base(self, knowledge_base: Dict):
	"""Initialize vector store with construction knowledge"""
	try:
	# Create documents
	documents = self._create_documents(knowledge_base)

	# Split documents into chunks
	splits = self.text_splitter.split_documents(documents)

	# Create vector store
	self.vector_store = FAISS.from_documents(
	documents=splits,
	embedding=self.embeddings
	)

	logger.info("Knowledge base initialized successfully")
	except Exception as e:
	logger.error(f"Error initializing knowledge base: {str(e)}")
	raise

	def _format_response(self, docs: List[Document], damage_type: str, confidence: float) -> Dict[str, List[str]]:
	"""Format retrieved documents into structured response"""
	response = {
	"technical_details": [],
	"safety_considerations": [],
	"expert_recommendations": []
	}

	try:
	for doc in docs:
	content = doc.page_content
	# Parse technical details
	if "Technical Analysis" in content:
	response["technical_details"].append(
	f"For {damage_type} (Confidence: {confidence:.1f}%):\n" +
	content.split("Technical Analysis")[1].split("Repair")[0].strip()
	)

	# Parse safety considerations
	if "Safety and Prevention" in content:
	response["safety_considerations"].append(
	content.split("Safety and Prevention")[1].strip()
	)

	# Parse repair recommendations
	if "Repair and Maintenance" in content:
	response["expert_recommendations"].append(
	content.split("Repair and Maintenance")[1].split("Safety")[0].strip()
	)

	return response
	except Exception as e:
	logger.error(f"Error formatting response: {str(e)}")
	raise

	def get_enhanced_analysis(
	self,
	damage_type: str,
	confidence: float,
	custom_query: str = None
	) -> Dict[str, List[str]]:
	"""Get enhanced analysis with optional custom query support"""
	try:
	if not self.vector_store:
	raise ValueError("Vector store not initialized")

	# Prepare query
	if custom_query:
	query = f"{custom_query} for {damage_type} damage"
	else:
	query = f"""
	Provide detailed analysis for {damage_type} damage with {confidence}% confidence level.
	Include technical assessment, safety considerations, and repair recommendations.
	"""

	# Get relevant documents
	docs = self.vector_store.similarity_search(
	query=query,
	k=3, # Get top 3 most relevant documents
	fetch_k=5 # Fetch top 5 for better diversity
	)

	# Format and return response
	return self._format_response(docs, damage_type, confidence)

	except Exception as e:
	logger.error(f"Error getting enhanced analysis: {str(e)}")
	return {
	"technical_details": [f"Error retrieving analysis: {str(e)}"],
	"safety_considerations": ["Please try again or contact support."],
	"expert_recommendations": ["System currently unavailable."]
	}

	def get_similar_cases(self, damage_type: str, confidence: float) -> List[Dict[str, Any]]:
	"""Get similar damage cases for comparison"""
	try:
	if not self.vector_store:
	raise ValueError("Vector store not initialized")

	query = f"Find similar cases of {damage_type} damage"
	docs = self.vector_store.similarity_search(query, k=3)

	similar_cases = []
	for doc in docs:
	if doc.metadata['damage_type'] != damage_type: # Avoid same damage type
	similar_cases.append({
	'damage_type': doc.metadata['damage_type'],
	'severity': doc.metadata['severity'],
	'location': doc.metadata['location'],
	'details': doc.page_content[:200] + '...' # First 200 chars
	})

	return similar_cases
	except Exception as e:
	logger.error(f"Error getting similar cases: {str(e)}")
	return []