Spaces:
Running
Running
File size: 11,895 Bytes
e9d730a d161383 e9d730a 4daad35 e9d730a 9700f95 4daad35 d161383 9700f95 d161383 4daad35 d161383 4daad35 d161383 4daad35 d161383 4daad35 d161383 4daad35 d161383 e9d730a 9700f95 b953016 9700f95 b953016 9700f95 b953016 9700f95 b953016 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 9700f95 e9d730a 4daad35 9700f95 4daad35 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 |
# src/db/mongodb_store.py
from motor.motor_asyncio import AsyncIOMotorClient
from datetime import datetime
from typing import List, Dict, Optional, Any
from bson import ObjectId
class MongoDBStore:
def __init__(self, mongo_uri: str = "mongodb://localhost:27017"):
"""Initialize MongoDB connection"""
self.client = AsyncIOMotorClient(mongo_uri)
self.db = self.client.db_chatbot
self.chat_history = self.db.chat_history
self.conversations = self.db.conversations
self.documents = self.db.knowledge_base
# Document-related methods
async def store_document(
self,
document_id: str,
filename: str,
content_type: str,
file_size: int,
url_path: str
) -> str:
"""Store document metadata in MongoDB"""
document = {
"document_id": document_id,
"filename": filename,
"content_type": content_type,
"file_size": file_size,
"url_path": url_path,
"upload_timestamp": datetime.now()
}
await self.documents.insert_one(document)
return document_id
async def get_document(self, document_id: str) -> Optional[Dict]:
"""Retrieve document by ID"""
return await self.documents.find_one(
{"document_id": document_id},
{"_id": 0}
)
async def get_all_documents(self) -> List[Dict]:
"""Retrieve all documents"""
cursor = self.documents.find(
{},
{
"_id": 0,
"document_id": 1,
"filename": 1,
"content_type": 1,
"file_size": 1,
"url_path": 1,
"upload_timestamp": 1
}
)
return await cursor.to_list(length=None)
async def delete_document(self, document_id: str) -> bool:
"""Delete document from MongoDB"""
result = await self.documents.delete_one({"document_id": document_id})
return result.deleted_count > 0
async def find_existing_user(
self,
email: str,
phone_number: str
) -> Optional[str]:
"""
Find existing user by email or phone number
Args:
email (str): User's email
phone_number (str): User's phone number
Returns:
Optional[str]: Conversation ID if found, None otherwise
"""
result = await self.conversations.find_one({
"$or": [
{"email": email},
{"phone_number": phone_number}
]
})
return result["conversation_id"] if result else None
# Conversation and chat history methods
async def create_conversation(
self,
conversation_id: str,
metadata: Optional[Dict] = None,
full_name: Optional[str] = None,
email: Optional[str] = None,
phone_number: Optional[str] = None
) -> str:
"""
Create a new conversation
Args:
conversation_id (str): Unique conversation ID
metadata (Optional[Dict]): Additional metadata
full_name (Optional[str]): User's full name
email (Optional[str]): User's email
phone_number (Optional[str]): User's phone number
Returns:
str: Conversation ID
"""
conversation = {
"conversation_id": conversation_id,
"created_at": datetime.now(),
"last_updated": datetime.now(),
"message_count": 0,
"metadata": metadata or {}
}
# Add user information if provided
if full_name:
conversation["full_name"] = full_name
if email:
conversation["email"] = email
if phone_number:
conversation["phone_number"] = phone_number
await self.conversations.insert_one(conversation)
return conversation_id
async def get_conversation_metadata(
self,
conversation_id: str
) -> Optional[Dict]:
"""Get conversation metadata"""
result = await self.conversations.find_one(
{"conversation_id": conversation_id}
)
if result:
result["_id"] = str(result["_id"])
return result
async def update_conversation_metadata(
self,
conversation_id: str,
metadata: Dict
) -> bool:
"""Update conversation metadata"""
result = await self.conversations.update_one(
{"conversation_id": conversation_id},
{
"$set": {
"metadata": metadata,
"last_updated": datetime.now()
}
}
)
return result.modified_count > 0
# Update the store_message method:
async def store_message(
self,
conversation_id: str,
query: str,
response: str,
context: List[str],
sources: List[Dict],
llm_provider: str
) -> str:
"""Store chat message in MongoDB"""
# Store user message
user_message = {
"conversation_id": conversation_id,
"timestamp": datetime.now(),
"role": "user",
"content": query,
"query": query, # Keep for backward compatibility
"response": None,
"context": context,
"sources": sources,
"llm_provider": llm_provider,
"feedback": None,
"rating": None
}
await self.chat_history.insert_one(user_message)
# Store assistant message
assistant_message = {
"conversation_id": conversation_id,
"timestamp": datetime.now(),
"role": "assistant",
"content": response,
"query": None,
"response": response, # Keep for backward compatibility
"context": context,
"sources": sources,
"llm_provider": llm_provider,
"feedback": None,
"rating": None
}
result = await self.chat_history.insert_one(assistant_message)
# Update conversation metadata
await self.conversations.update_one(
{"conversation_id": conversation_id},
{
"$set": {"last_updated": datetime.now()},
"$inc": {"message_count": 2} # Increment by 2 since we store both messages
},
upsert=True
)
return str(result.inserted_id)
async def get_conversation_history(self, conversation_id: str) -> List[Dict]:
"""Retrieve complete conversation history"""
cursor = self.chat_history.find(
{"conversation_id": conversation_id}
).sort("timestamp", 1)
history = []
async for document in cursor:
document["_id"] = str(document["_id"])
history.append(document)
return history
async def get_recent_messages(
self,
conversation_id: str,
limit: int = 5
) -> List[Dict]:
"""Get most recent messages from conversation"""
cursor = self.chat_history.find(
{"conversation_id": conversation_id}
).sort("timestamp", -1).limit(limit * 2) # Multiply limit by 2 to account for user-assistant pairs
messages = []
async for doc in cursor:
messages.append(self._format_message(doc))
return list(reversed(messages))
async def update_feedback(
self,
conversation_id: str,
feedback: Optional[str],
rating: Optional[int]
) -> bool:
"""
Update feedback for a conversation
Args:
conversation_id (str): Conversation ID
feedback (Optional[str]): Feedback text
rating (Optional[int]): Numeric rating
Returns:
bool: True if update successful
"""
update_fields = {}
if feedback is not None:
update_fields["feedback"] = feedback
if rating is not None:
from config.config import settings
formatted_rating = f"{rating}/{settings.MAX_RATING}"
update_fields.update({
"rating": rating, # Store numeric value
"formatted_rating": formatted_rating # Store formatted string
})
if not update_fields:
return False
result = await self.chat_history.update_many(
{"conversation_id": conversation_id},
{"$set": update_fields}
)
# Also update conversation metadata
if result.modified_count > 0:
await self.update_conversation_metadata(
conversation_id,
{
"last_feedback": datetime.now(),
"last_rating": rating if rating is not None else None,
"formatted_rating": formatted_rating if rating is not None else None
}
)
return result.modified_count > 0
async def get_messages_for_summary(
self,
conversation_id: str
) -> List[Dict]:
"""Get messages in format suitable for summarization"""
cursor = self.chat_history.find(
{"conversation_id": conversation_id}
).sort("timestamp", 1)
messages = []
async for doc in cursor:
formatted = self._format_message(doc)
# For summary, we only need specific fields
messages.append({
'role': formatted['role'],
'content': formatted['content'],
'timestamp': formatted['timestamp'],
'sources': formatted['sources']
})
return messages
def _format_message(self, doc: Dict) -> Dict:
"""Helper method to format message documents consistently"""
return {
"_id": str(doc["_id"]) if "_id" in doc else None,
"conversation_id": doc.get("conversation_id"),
"timestamp": doc.get("timestamp"),
"role": doc.get("role", "user" if doc.get("query") else "assistant"),
"content": doc.get("content", doc.get("query") or doc.get("response", "")),
"context": doc.get("context", []),
"sources": doc.get("sources", []),
"llm_provider": doc.get("llm_provider"),
"feedback": doc.get("feedback"),
"rating": doc.get("rating")
}
# Vector store related methods
async def store_vector_metadata(
self,
document_id: str,
chunk_id: str,
metadata: Dict[str, Any]
) -> str:
"""Store vector chunk metadata"""
vector_metadata = {
"document_id": document_id,
"chunk_id": chunk_id,
"metadata": metadata,
"created_at": datetime.now()
}
result = await self.db.vector_metadata.insert_one(vector_metadata)
return str(result.inserted_id)
async def get_vector_metadata(
self,
document_id: str
) -> List[Dict]:
"""Get vector metadata for a document"""
cursor = self.db.vector_metadata.find(
{"document_id": document_id}
)
return await cursor.to_list(length=None)
async def delete_vector_metadata(
self,
document_id: str
) -> bool:
"""Delete vector metadata for a document"""
result = await self.db.vector_metadata.delete_many(
{"document_id": document_id}
)
return result.deleted_count > 0 |