Create rag_utils.py
Browse files- rag_utils.py +73 -0
rag_utils.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
2 |
+
from langchain.vectorstores import FAISS
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
import streamlit as st
|
5 |
+
|
6 |
+
class RAGSystem:
|
7 |
+
def __init__(self):
|
8 |
+
self.embeddings = HuggingFaceEmbeddings(
|
9 |
+
model_name="sentence-transformers/all-mpnet-base-v2"
|
10 |
+
)
|
11 |
+
self.vector_store = None
|
12 |
+
self.text_splitter = RecursiveCharacterTextSplitter(
|
13 |
+
chunk_size=500,
|
14 |
+
chunk_overlap=50
|
15 |
+
)
|
16 |
+
|
17 |
+
def initialize_knowledge_base(self, knowledge_base):
|
18 |
+
"""Initialize vector store with construction knowledge"""
|
19 |
+
documents = []
|
20 |
+
|
21 |
+
for damage_type, cases in knowledge_base.items():
|
22 |
+
for case in cases:
|
23 |
+
doc_text = f"""
|
24 |
+
Damage Type: {damage_type}
|
25 |
+
Severity: {case['severity']}
|
26 |
+
Description: {case['description']}
|
27 |
+
Repair Methods: {', '.join(case['repair_method'])}
|
28 |
+
Cost: {case['estimated_cost']}
|
29 |
+
Timeframe: {case['timeframe']}
|
30 |
+
Location: {case['location']}
|
31 |
+
Required Expertise: {case['required_expertise']}
|
32 |
+
Immediate Action: {case['immediate_action']}
|
33 |
+
Prevention: {case['prevention']}
|
34 |
+
"""
|
35 |
+
documents.append(doc_text)
|
36 |
+
|
37 |
+
splits = self.text_splitter.create_documents(documents)
|
38 |
+
self.vector_store = FAISS.from_documents(splits, self.embeddings)
|
39 |
+
|
40 |
+
def query_knowledge_base(self, query, k=3):
|
41 |
+
"""Query the vector store for relevant information"""
|
42 |
+
if not self.vector_store:
|
43 |
+
raise ValueError("Vector store not initialized")
|
44 |
+
|
45 |
+
results = self.vector_store.similarity_search(query, k=k)
|
46 |
+
return results
|
47 |
+
|
48 |
+
def get_enhanced_analysis(self, damage_type, confidence, custom_query=None):
|
49 |
+
"""Get enhanced analysis based on damage type and confidence"""
|
50 |
+
if not custom_query:
|
51 |
+
query = f"What are the key considerations and recommendations for {damage_type} with {confidence}% confidence?"
|
52 |
+
else:
|
53 |
+
query = custom_query
|
54 |
+
|
55 |
+
relevant_docs = self.query_knowledge_base(query)
|
56 |
+
|
57 |
+
enhanced_info = {
|
58 |
+
"technical_details": [],
|
59 |
+
"safety_considerations": [],
|
60 |
+
"expert_recommendations": []
|
61 |
+
}
|
62 |
+
|
63 |
+
for doc in relevant_docs:
|
64 |
+
content = doc.page_content
|
65 |
+
|
66 |
+
if "severity" in content.lower() or "description" in content.lower():
|
67 |
+
enhanced_info["technical_details"].append(content)
|
68 |
+
elif "immediate action" in content.lower() or "prevention" in content.lower():
|
69 |
+
enhanced_info["safety_considerations"].append(content)
|
70 |
+
elif "repair" in content.lower() or "expertise" in content.lower():
|
71 |
+
enhanced_info["expert_recommendations"].append(content)
|
72 |
+
|
73 |
+
return enhanced_info
|