Spaces:
Sleeping
Sleeping
Update back.py
Browse files
back.py
CHANGED
@@ -1,298 +1,300 @@
|
|
1 |
-
import os
|
2 |
-
import logging
|
3 |
-
from typing import List, Dict, Any, Optional, Union
|
4 |
-
from dataclasses import dataclass
|
5 |
-
import torch
|
6 |
-
from sentence_transformers import SentenceTransformer
|
7 |
-
from langchain.vectorstores import FAISS
|
8 |
-
from langchain_core.embeddings import Embeddings
|
9 |
-
import google.generativeai as genai
|
10 |
-
from datetime import datetime
|
11 |
-
import json
|
12 |
-
|
13 |
-
@dataclass
|
14 |
-
class UserInfo:
|
15 |
-
"""User information for context"""
|
16 |
-
name: str
|
17 |
-
college: str
|
18 |
-
degree: str
|
19 |
-
year: int
|
20 |
-
career_goals: str
|
21 |
-
has_internship: bool
|
22 |
-
has_placement: bool
|
23 |
-
|
24 |
-
@dataclass
|
25 |
-
class ChatConfig:
|
26 |
-
"""Configuration for the chatbot"""
|
27 |
-
embedding_model_name: str = 'all-MiniLM-L6-v2'
|
28 |
-
device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
|
29 |
-
max_history: int = 3
|
30 |
-
gemini_api_key: str = "AIzaSyCAlM-YPVKl1qTnUwInWz9X5sNCmISPOr4" # Replace with your API key
|
31 |
-
log_file: str = "chat_history.txt"
|
32 |
-
user_data_file: str = "user_data.json"
|
33 |
-
|
34 |
-
# In the UserManager class, modify these methods:
|
35 |
-
class UserManager:
|
36 |
-
"""Manages user information storage and retrieval"""
|
37 |
-
def __init__(self, user_data_file: str):
|
38 |
-
self.user_data_file = user_data_file
|
39 |
-
self.ensure_file_exists()
|
40 |
-
|
41 |
-
def ensure_file_exists(self):
|
42 |
-
"""Create user data file if it doesn't exist"""
|
43 |
-
if not os.path.exists(self.user_data_file):
|
44 |
-
os.makedirs(os.path.dirname(self.user_data_file), exist_ok=True)
|
45 |
-
with open(self.user_data_file, 'w', encoding='utf-8') as f:
|
46 |
-
json.dump({}, f)
|
47 |
-
|
48 |
-
def save_user_info(self, user_info: UserInfo):
|
49 |
-
"""Save user information to JSON file"""
|
50 |
-
try:
|
51 |
-
# First ensure the file exists with valid JSON
|
52 |
-
self.ensure_file_exists()
|
53 |
-
|
54 |
-
# Read existing data
|
55 |
-
try:
|
56 |
-
with open(self.user_data_file, 'r', encoding='utf-8') as f:
|
57 |
-
data = json.load(f)
|
58 |
-
except json.JSONDecodeError:
|
59 |
-
data = {}
|
60 |
-
|
61 |
-
# Update data
|
62 |
-
data[user_info.name] = {
|
63 |
-
"college": user_info.college,
|
64 |
-
"degree": user_info.degree,
|
65 |
-
"year": user_info.year,
|
66 |
-
"career_goals": user_info.career_goals,
|
67 |
-
"has_internship": user_info.has_internship,
|
68 |
-
"has_placement": user_info.has_placement,
|
69 |
-
"last_updated": datetime.now().isoformat()
|
70 |
-
}
|
71 |
-
|
72 |
-
# Write back to file
|
73 |
-
with open(self.user_data_file, 'w', encoding='utf-8') as f:
|
74 |
-
json.dump(data, f, indent=4)
|
75 |
-
return True
|
76 |
-
except Exception as e:
|
77 |
-
logging.error(f"Error saving user info: {str(e)}")
|
78 |
-
return False
|
79 |
-
|
80 |
-
|
81 |
-
class ChatLogger:
|
82 |
-
"""Logger for chat interactions"""
|
83 |
-
def __init__(self, log_file: str):
|
84 |
-
self.log_file = log_file
|
85 |
-
|
86 |
-
def log_interaction(self, question: str, answer: str, user_info: Optional[UserInfo] = None):
|
87 |
-
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
88 |
-
with open(self.log_file, 'a', encoding='utf-8') as f:
|
89 |
-
user_context = ""
|
90 |
-
if user_info:
|
91 |
-
user_context = f"\nUser: {user_info.name} | College: {user_info.college} | Degree: {user_info.degree} | Year: {user_info.year} | Career Goals: {user_info.career_goals}"
|
92 |
-
f.write(f"\n[{timestamp}]{user_context}\nQ: {question}\nA: {answer}\n{'-'*50}")
|
93 |
-
|
94 |
-
class ChatMemory:
|
95 |
-
"""Manages chat history"""
|
96 |
-
def __init__(self, max_history: int = 3):
|
97 |
-
self.max_history = max_history
|
98 |
-
self.history = []
|
99 |
-
|
100 |
-
def add_interaction(self, question: str, answer: str):
|
101 |
-
self.history.append({"question": question, "answer": answer})
|
102 |
-
if len(self.history) > self.max_history:
|
103 |
-
self.history.pop(0)
|
104 |
-
|
105 |
-
def get_history(self) -> List[Dict[str, str]]:
|
106 |
-
return self.history
|
107 |
-
|
108 |
-
def clear_history(self):
|
109 |
-
self.history = []
|
110 |
-
|
111 |
-
class QuestionGenerator:
|
112 |
-
def __init__(self, api_key: str):
|
113 |
-
genai.configure(api_key=api_key)
|
114 |
-
self.generation_config = {
|
115 |
-
"temperature": 0.1,
|
116 |
-
"top_p": 0.95,
|
117 |
-
"max_output_tokens": 8192,
|
118 |
-
}
|
119 |
-
self.model = genai.GenerativeModel(
|
120 |
-
model_name="gemini-1.5-flash",
|
121 |
-
generation_config=self.generation_config,
|
122 |
-
safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
|
123 |
-
)
|
124 |
-
|
125 |
-
self.default_questions = [
|
126 |
-
"What are some other skills I should focus on to improve my chances?",
|
127 |
-
"What resources or platforms can help me in my career journey?",
|
128 |
-
"Are there any specific companies or organizations I should target for internships/placements?",
|
129 |
-
"What are some common interview questions asked for this career path?"
|
130 |
-
]
|
131 |
-
|
132 |
-
async def generate_questions(
|
133 |
-
self,
|
134 |
-
question: str,
|
135 |
-
answer: str,
|
136 |
-
user_info: Optional[UserInfo] = None
|
137 |
-
) -> List[str]:
|
138 |
-
"""Generate follow-up questions based on the conversation"""
|
139 |
-
try:
|
140 |
-
chat = self.model.start_chat(history=[])
|
141 |
-
prompt = f"""Generate 4 simple, practical follow-up questions, that a college student may ask, based on this conversation about career advice:
|
142 |
-
|
143 |
-
Question: {question}
|
144 |
-
Answer: {answer}
|
145 |
-
|
146 |
-
Focus the questions on:
|
147 |
-
1. Skills development (What skills are needed, how to improve)
|
148 |
-
2. Resources and platforms (Where to find internships, jobs, etc.)
|
149 |
-
3. Specific target companies/organizations
|
150 |
-
4. Common interview questions
|
151 |
-
|
152 |
-
Keep the language simple and student-friendly. Format each question on a new line.
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
"
|
176 |
-
"
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
{
|
206 |
-
|
207 |
-
|
208 |
-
-
|
209 |
-
- {
|
210 |
-
- {
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
return
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
|
298 |
return []
|
|
|
1 |
+
import os
|
2 |
+
import logging
|
3 |
+
from typing import List, Dict, Any, Optional, Union
|
4 |
+
from dataclasses import dataclass
|
5 |
+
import torch
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from langchain.vectorstores import FAISS
|
8 |
+
from langchain_core.embeddings import Embeddings
|
9 |
+
import google.generativeai as genai
|
10 |
+
from datetime import datetime
|
11 |
+
import json
|
12 |
+
|
13 |
+
@dataclass
|
14 |
+
class UserInfo:
|
15 |
+
"""User information for context"""
|
16 |
+
name: str
|
17 |
+
college: str
|
18 |
+
degree: str
|
19 |
+
year: int
|
20 |
+
career_goals: str
|
21 |
+
has_internship: bool
|
22 |
+
has_placement: bool
|
23 |
+
|
24 |
+
@dataclass
|
25 |
+
class ChatConfig:
|
26 |
+
"""Configuration for the chatbot"""
|
27 |
+
embedding_model_name: str = 'all-MiniLM-L6-v2'
|
28 |
+
device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
|
29 |
+
max_history: int = 3
|
30 |
+
gemini_api_key: str = "AIzaSyCAlM-YPVKl1qTnUwInWz9X5sNCmISPOr4" # Replace with your API key
|
31 |
+
log_file: str = "chat_history.txt"
|
32 |
+
user_data_file: str = "user_data.json"
|
33 |
+
|
34 |
+
# In the UserManager class, modify these methods:
|
35 |
+
class UserManager:
|
36 |
+
"""Manages user information storage and retrieval"""
|
37 |
+
def __init__(self, user_data_file: str):
|
38 |
+
self.user_data_file = user_data_file
|
39 |
+
self.ensure_file_exists()
|
40 |
+
|
41 |
+
def ensure_file_exists(self):
|
42 |
+
"""Create user data file if it doesn't exist"""
|
43 |
+
if not os.path.exists(self.user_data_file):
|
44 |
+
os.makedirs(os.path.dirname(self.user_data_file), exist_ok=True)
|
45 |
+
with open(self.user_data_file, 'w', encoding='utf-8') as f:
|
46 |
+
json.dump({}, f)
|
47 |
+
|
48 |
+
def save_user_info(self, user_info: UserInfo):
|
49 |
+
"""Save user information to JSON file"""
|
50 |
+
try:
|
51 |
+
# First ensure the file exists with valid JSON
|
52 |
+
self.ensure_file_exists()
|
53 |
+
|
54 |
+
# Read existing data
|
55 |
+
try:
|
56 |
+
with open(self.user_data_file, 'r', encoding='utf-8') as f:
|
57 |
+
data = json.load(f)
|
58 |
+
except json.JSONDecodeError:
|
59 |
+
data = {}
|
60 |
+
|
61 |
+
# Update data
|
62 |
+
data[user_info.name] = {
|
63 |
+
"college": user_info.college,
|
64 |
+
"degree": user_info.degree,
|
65 |
+
"year": user_info.year,
|
66 |
+
"career_goals": user_info.career_goals,
|
67 |
+
"has_internship": user_info.has_internship,
|
68 |
+
"has_placement": user_info.has_placement,
|
69 |
+
"last_updated": datetime.now().isoformat()
|
70 |
+
}
|
71 |
+
|
72 |
+
# Write back to file
|
73 |
+
with open(self.user_data_file, 'w', encoding='utf-8') as f:
|
74 |
+
json.dump(data, f, indent=4)
|
75 |
+
return True
|
76 |
+
except Exception as e:
|
77 |
+
logging.error(f"Error saving user info: {str(e)}")
|
78 |
+
return False
|
79 |
+
|
80 |
+
|
81 |
+
class ChatLogger:
|
82 |
+
"""Logger for chat interactions"""
|
83 |
+
def __init__(self, log_file: str):
|
84 |
+
self.log_file = log_file
|
85 |
+
|
86 |
+
def log_interaction(self, question: str, answer: str, user_info: Optional[UserInfo] = None):
|
87 |
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
88 |
+
with open(self.log_file, 'a', encoding='utf-8') as f:
|
89 |
+
user_context = ""
|
90 |
+
if user_info:
|
91 |
+
user_context = f"\nUser: {user_info.name} | College: {user_info.college} | Degree: {user_info.degree} | Year: {user_info.year} | Career Goals: {user_info.career_goals}"
|
92 |
+
f.write(f"\n[{timestamp}]{user_context}\nQ: {question}\nA: {answer}\n{'-'*50}")
|
93 |
+
|
94 |
+
class ChatMemory:
|
95 |
+
"""Manages chat history"""
|
96 |
+
def __init__(self, max_history: int = 3):
|
97 |
+
self.max_history = max_history
|
98 |
+
self.history = []
|
99 |
+
|
100 |
+
def add_interaction(self, question: str, answer: str):
|
101 |
+
self.history.append({"question": question, "answer": answer})
|
102 |
+
if len(self.history) > self.max_history:
|
103 |
+
self.history.pop(0)
|
104 |
+
|
105 |
+
def get_history(self) -> List[Dict[str, str]]:
|
106 |
+
return self.history
|
107 |
+
|
108 |
+
def clear_history(self):
|
109 |
+
self.history = []
|
110 |
+
|
111 |
+
class QuestionGenerator:
|
112 |
+
def __init__(self, api_key: str):
|
113 |
+
genai.configure(api_key=api_key)
|
114 |
+
self.generation_config = {
|
115 |
+
"temperature": 0.1,
|
116 |
+
"top_p": 0.95,
|
117 |
+
"max_output_tokens": 8192,
|
118 |
+
}
|
119 |
+
self.model = genai.GenerativeModel(
|
120 |
+
model_name="gemini-1.5-flash",
|
121 |
+
generation_config=self.generation_config,
|
122 |
+
safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
|
123 |
+
)
|
124 |
+
|
125 |
+
self.default_questions = [
|
126 |
+
"What are some other skills I should focus on to improve my chances?",
|
127 |
+
"What resources or platforms can help me in my career journey?",
|
128 |
+
"Are there any specific companies or organizations I should target for internships/placements?",
|
129 |
+
"What are some common interview questions asked for this career path?"
|
130 |
+
]
|
131 |
+
|
132 |
+
async def generate_questions(
|
133 |
+
self,
|
134 |
+
question: str,
|
135 |
+
answer: str,
|
136 |
+
user_info: Optional[UserInfo] = None
|
137 |
+
) -> List[str]:
|
138 |
+
"""Generate follow-up questions based on the conversation"""
|
139 |
+
try:
|
140 |
+
chat = self.model.start_chat(history=[])
|
141 |
+
prompt = f"""Generate 4 simple, practical follow-up questions, that a college student may ask, based on this conversation about career advice:
|
142 |
+
|
143 |
+
Question: {question}
|
144 |
+
Answer: {answer}
|
145 |
+
|
146 |
+
Focus the questions on:
|
147 |
+
1. Skills development (What skills are needed, how to improve)
|
148 |
+
2. Resources and platforms (Where to find internships, jobs, etc.)
|
149 |
+
3. Specific target companies/organizations
|
150 |
+
4. Common interview questions
|
151 |
+
|
152 |
+
Keep the language simple and student-friendly. Format each question on a new line.
|
153 |
+
|
154 |
+
NOTE: YOU MUST STRICTLY REPLY IN HINGLISH"""
|
155 |
+
|
156 |
+
response = chat.send_message(prompt).text
|
157 |
+
|
158 |
+
# Extract questions
|
159 |
+
questions = [q.strip() for q in response.split('\n') if q.strip()]
|
160 |
+
|
161 |
+
# Return default questions if we don't get exactly 4 valid questions
|
162 |
+
if len(questions) != 4:
|
163 |
+
return self.default_questions
|
164 |
+
|
165 |
+
return questions
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
logging.error(f"Error generating questions: {str(e)}")
|
169 |
+
return self.default_questions
|
170 |
+
|
171 |
+
class GeminiRAG:
|
172 |
+
def __init__(self, api_key: str):
|
173 |
+
genai.configure(api_key=api_key)
|
174 |
+
self.generation_config = {
|
175 |
+
"temperature": 0.1,
|
176 |
+
"top_p": 0.95,
|
177 |
+
"top_k": 64,
|
178 |
+
"max_output_tokens": 8192,
|
179 |
+
}
|
180 |
+
self.model = genai.GenerativeModel(
|
181 |
+
model_name="gemini-1.5-flash",
|
182 |
+
generation_config=self.generation_config,
|
183 |
+
safety_settings={'HATE': 'BLOCK_NONE','HARASSMENT': 'BLOCK_NONE','SEXUAL' : 'BLOCK_NONE','DANGEROUS' : 'BLOCK_NONE'}
|
184 |
+
)
|
185 |
+
|
186 |
+
def create_context(self, relevant_docs: List[Dict[str, Any]]) -> str:
|
187 |
+
"""Creates a context string from relevant documents"""
|
188 |
+
context_parts = []
|
189 |
+
for doc in relevant_docs:
|
190 |
+
context_parts.append(f"Section: {doc['metadata']['section']}\n{doc['content']}")
|
191 |
+
return "\n\n".join(context_parts)
|
192 |
+
|
193 |
+
async def get_answer(
|
194 |
+
self,
|
195 |
+
question: str,
|
196 |
+
context: str,
|
197 |
+
user_info: Optional[UserInfo] = None
|
198 |
+
) -> str:
|
199 |
+
try:
|
200 |
+
chat = self.model.start_chat(history=[])
|
201 |
+
|
202 |
+
# Simplified prompt to reduce chances of recitation
|
203 |
+
prompt = f"""As a career counselor, provide a helpful response based on:
|
204 |
+
|
205 |
+
Context: {context}
|
206 |
+
|
207 |
+
{f'''User Background:
|
208 |
+
- Student at {user_info.college}
|
209 |
+
- Studying {user_info.degree} (Year {user_info.year})
|
210 |
+
- Goals: {user_info.career_goals}
|
211 |
+
- {'Has internship experience' if user_info.has_internship else 'No internship yet'}
|
212 |
+
- {'Has placement' if user_info.has_placement else 'Seeking placement'}''' if user_info else ''}
|
213 |
+
|
214 |
+
Question: {question}
|
215 |
+
|
216 |
+
Provide practical advice with specific examples and actionable steps."""
|
217 |
+
|
218 |
+
try:
|
219 |
+
response = chat.send_message(prompt)
|
220 |
+
if response.text:
|
221 |
+
return response.text
|
222 |
+
else:
|
223 |
+
return "I apologize, but I couldn't generate a proper response. Please try rephrasing your question."
|
224 |
+
except Exception as chat_error:
|
225 |
+
logging.error(f"Chat error: {str(chat_error)}")
|
226 |
+
return "I encountered an error while processing your question. Please try again with a simpler question."
|
227 |
+
|
228 |
+
except Exception as e:
|
229 |
+
logging.error(f"Error generating answer: {str(e)}")
|
230 |
+
return "An error occurred. Please try again later."
|
231 |
+
|
232 |
+
class CustomEmbeddings(Embeddings):
|
233 |
+
"""Custom embeddings using SentenceTransformer"""
|
234 |
+
def __init__(self, model_name: str, device: str):
|
235 |
+
self.model = SentenceTransformer(model_name)
|
236 |
+
self.model.to(device)
|
237 |
+
|
238 |
+
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
239 |
+
with torch.no_grad():
|
240 |
+
embeddings = self.model.encode(texts, convert_to_tensor=True)
|
241 |
+
return embeddings.cpu().numpy().tolist()
|
242 |
+
|
243 |
+
def embed_query(self, text: str) -> List[float]:
|
244 |
+
with torch.no_grad():
|
245 |
+
embedding = self.model.encode([text], convert_to_tensor=True)
|
246 |
+
return embedding.cpu().numpy().tolist()[0]
|
247 |
+
|
248 |
+
class ProductDatabase:
|
249 |
+
"""Handles document storage and retrieval"""
|
250 |
+
def __init__(self, config: ChatConfig):
|
251 |
+
self.embeddings = CustomEmbeddings(
|
252 |
+
model_name=config.embedding_model_name,
|
253 |
+
device=config.device
|
254 |
+
)
|
255 |
+
self.vectorstore = None
|
256 |
+
|
257 |
+
def process_markdown(self, markdown_content: str):
|
258 |
+
"""Process markdown content and create vector store"""
|
259 |
+
try:
|
260 |
+
sections = markdown_content.split('\n## ')
|
261 |
+
documents = []
|
262 |
+
|
263 |
+
if sections[0].startswith('# '):
|
264 |
+
intro = sections[0].split('\n', 1)[1]
|
265 |
+
documents.append({
|
266 |
+
"content": intro,
|
267 |
+
"section": "Introduction"
|
268 |
+
})
|
269 |
+
|
270 |
+
for section in sections[1:]:
|
271 |
+
if section.strip():
|
272 |
+
title, content = section.split('\n', 1)
|
273 |
+
documents.append({
|
274 |
+
"content": content.strip(),
|
275 |
+
"section": title.strip()
|
276 |
+
})
|
277 |
+
|
278 |
+
texts = [doc["content"] for doc in documents]
|
279 |
+
metadatas = [{"section": doc["section"]} for doc in documents]
|
280 |
+
|
281 |
+
self.vectorstore = FAISS.from_texts(
|
282 |
+
texts=texts,
|
283 |
+
embedding=self.embeddings,
|
284 |
+
metadatas=metadatas
|
285 |
+
)
|
286 |
+
|
287 |
+
except Exception as e:
|
288 |
+
raise Exception(f"Error processing markdown content: {str(e)}")
|
289 |
+
|
290 |
+
def search(self, query: str, k: int = 3) -> List[Dict[str, Any]]:
|
291 |
+
"""Search for relevant documents"""
|
292 |
+
if not self.vectorstore:
|
293 |
+
raise ValueError("Database not initialized. Please process documents first.")
|
294 |
+
|
295 |
+
try:
|
296 |
+
docs = self.vectorstore.similarity_search(query, k=k)
|
297 |
+
return [{"content": doc.page_content, "metadata": doc.metadata} for doc in docs]
|
298 |
+
except Exception as e:
|
299 |
+
logging.error(f"Error during search: {str(e)}")
|
300 |
return []
|