smart / rag_utils.py
Shakir60's picture
Create rag_utils.py
d186f2c verified
raw
history blame
3.04 kB
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
import streamlit as st
class RAGSystem:
def __init__(self):
self.embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2"
)
self.vector_store = None
self.text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
def initialize_knowledge_base(self, knowledge_base):
"""Initialize vector store with construction knowledge"""
documents = []
for damage_type, cases in knowledge_base.items():
for case in cases:
doc_text = f"""
Damage Type: {damage_type}
Severity: {case['severity']}
Description: {case['description']}
Repair Methods: {', '.join(case['repair_method'])}
Cost: {case['estimated_cost']}
Timeframe: {case['timeframe']}
Location: {case['location']}
Required Expertise: {case['required_expertise']}
Immediate Action: {case['immediate_action']}
Prevention: {case['prevention']}
"""
documents.append(doc_text)
splits = self.text_splitter.create_documents(documents)
self.vector_store = FAISS.from_documents(splits, self.embeddings)
def query_knowledge_base(self, query, k=3):
"""Query the vector store for relevant information"""
if not self.vector_store:
raise ValueError("Vector store not initialized")
results = self.vector_store.similarity_search(query, k=k)
return results
def get_enhanced_analysis(self, damage_type, confidence, custom_query=None):
"""Get enhanced analysis based on damage type and confidence"""
if not custom_query:
query = f"What are the key considerations and recommendations for {damage_type} with {confidence}% confidence?"
else:
query = custom_query
relevant_docs = self.query_knowledge_base(query)
enhanced_info = {
"technical_details": [],
"safety_considerations": [],
"expert_recommendations": []
}
for doc in relevant_docs:
content = doc.page_content
if "severity" in content.lower() or "description" in content.lower():
enhanced_info["technical_details"].append(content)
elif "immediate action" in content.lower() or "prevention" in content.lower():
enhanced_info["safety_considerations"].append(content)
elif "repair" in content.lower() or "expertise" in content.lower():
enhanced_info["expert_recommendations"].append(content)
return enhanced_info