# src/db/mongodb_store.py from motor.motor_asyncio import AsyncIOMotorClient from datetime import datetime from typing import List, Dict, Optional, Any from bson import ObjectId class MongoDBStore: def __init__(self, mongo_uri: str = "mongodb://localhost:27017"): """Initialize MongoDB connection""" self.client = AsyncIOMotorClient(mongo_uri) self.db = self.client.db_chatbot self.chat_history = self.db.chat_history self.conversations = self.db.conversations self.documents = self.db.knowledge_base # Document-related methods async def store_document( self, document_id: str, filename: str, content_type: str, file_size: int, url_path: str, source: str ) -> str: """Store document metadata in MongoDB""" document = { "document_id": document_id, "filename": filename, "content_type": content_type, "file_size": file_size, "url_path": url_path, "source": source, "upload_timestamp": datetime.now() } await self.documents.insert_one(document) return document_id async def get_document(self, document_id: str) -> Optional[Dict]: """Retrieve document by ID""" return await self.documents.find_one( {"document_id": document_id}, {"_id": 0} ) async def get_all_documents(self) -> List[Dict]: """Retrieve all documents""" cursor = self.documents.find( {}, { "_id": 0, "document_id": 1, "filename": 1, "content_type": 1, "file_size": 1, "url_path": 1, "upload_timestamp": 1, "source": 1 } ) return await cursor.to_list(length=None) async def delete_document(self, document_id: str) -> bool: """Delete document from MongoDB""" result = await self.documents.delete_one({"document_id": document_id}) return result.deleted_count > 0 async def find_existing_user( self, email: str, phone_number: str ) -> Optional[str]: """ Find existing user by email or phone number Args: email (str): User's email phone_number (str): User's phone number Returns: Optional[str]: Conversation ID if found, None otherwise """ result = await self.conversations.find_one({ "$or": [ {"email": email}, {"phone_number": phone_number} ] }) return result["conversation_id"] if result else None # Conversation and chat history methods async def create_conversation( self, conversation_id: str, metadata: Optional[Dict] = None, full_name: Optional[str] = None, email: Optional[str] = None, phone_number: Optional[str] = None ) -> str: """ Create a new conversation Args: conversation_id (str): Unique conversation ID metadata (Optional[Dict]): Additional metadata full_name (Optional[str]): User's full name email (Optional[str]): User's email phone_number (Optional[str]): User's phone number Returns: str: Conversation ID """ conversation = { "conversation_id": conversation_id, "created_at": datetime.now(), "last_updated": datetime.now(), "message_count": 0, "metadata": metadata or {} } # Add user information if provided if full_name: conversation["full_name"] = full_name if email: conversation["email"] = email if phone_number: conversation["phone_number"] = phone_number await self.conversations.insert_one(conversation) return conversation_id async def get_conversation_metadata( self, conversation_id: str ) -> Optional[Dict]: """Get conversation metadata""" result = await self.conversations.find_one( {"conversation_id": conversation_id} ) if result: result["_id"] = str(result["_id"]) return result async def update_conversation_metadata( self, conversation_id: str, metadata: Dict ) -> bool: """Update conversation metadata""" result = await self.conversations.update_one( {"conversation_id": conversation_id}, { "$set": { "metadata": metadata, "last_updated": datetime.now() } } ) return result.modified_count > 0 # Update the store_message method: async def store_message( self, conversation_id: str, query: str, response: str, context: List[str], sources: List[Dict], llm_provider: str ) -> str: """Store chat message in MongoDB""" # Store user message user_message = { "conversation_id": conversation_id, "timestamp": datetime.now(), "role": "user", "content": query, "query": query, # Keep for backward compatibility "response": None, "context": context, "sources": sources, "llm_provider": llm_provider, "feedback": None, "rating": None } await self.chat_history.insert_one(user_message) # Store assistant message assistant_message = { "conversation_id": conversation_id, "timestamp": datetime.now(), "role": "assistant", "content": response, "query": None, "response": response, # Keep for backward compatibility "context": context, "sources": sources, "llm_provider": llm_provider, "feedback": None, "rating": None } result = await self.chat_history.insert_one(assistant_message) # Update conversation metadata await self.conversations.update_one( {"conversation_id": conversation_id}, { "$set": {"last_updated": datetime.now()}, # Increment by 2 since we store both messages "$inc": {"message_count": 2} }, upsert=True ) return str(result.inserted_id) async def get_conversation_history(self, conversation_id: str) -> List[Dict]: """Retrieve complete conversation history""" cursor = self.chat_history.find( {"conversation_id": conversation_id} ).sort("timestamp", 1) history = [] async for document in cursor: document["_id"] = str(document["_id"]) history.append(document) return history async def get_recent_messages( self, conversation_id: str, limit: int = 5 ) -> List[Dict]: """Get most recent messages from conversation""" cursor = self.chat_history.find( {"conversation_id": conversation_id} # Multiply limit by 2 to account for user-assistant pairs ).sort("timestamp", -1).limit(limit * 2) messages = [] async for doc in cursor: messages.append(self._format_message(doc)) return list(reversed(messages)) async def update_feedback( self, conversation_id: str, feedback: Optional[str], rating: Optional[int] ) -> bool: """ Update feedback for a conversation Args: conversation_id (str): Conversation ID feedback (Optional[str]): Feedback text rating (Optional[int]): Numeric rating Returns: bool: True if update successful """ update_fields = {} if feedback is not None: update_fields["feedback"] = feedback if rating is not None: from config.config import settings formatted_rating = f"{rating}/{settings.MAX_RATING}" update_fields.update({ "rating": rating, # Store numeric value "formatted_rating": formatted_rating # Store formatted string }) if not update_fields: return False result = await self.chat_history.update_many( {"conversation_id": conversation_id}, {"$set": update_fields} ) # Also update conversation metadata if result.modified_count > 0: await self.update_conversation_metadata( conversation_id, { "last_feedback": datetime.now(), "last_rating": rating if rating is not None else None, "formatted_rating": formatted_rating if rating is not None else None } ) return result.modified_count > 0 async def get_messages_for_summary( self, conversation_id: str ) -> List[Dict]: """Get messages in format suitable for summarization""" cursor = self.chat_history.find( {"conversation_id": conversation_id} ).sort("timestamp", 1) messages = [] async for doc in cursor: formatted = self._format_message(doc) # For summary, we only need specific fields messages.append({ 'role': formatted['role'], 'content': formatted['content'], 'timestamp': formatted['timestamp'], 'sources': formatted['sources'] }) return messages def _format_message(self, doc: Dict) -> Dict: """Helper method to format message documents consistently""" return { "_id": str(doc["_id"]) if "_id" in doc else None, "conversation_id": doc.get("conversation_id"), "timestamp": doc.get("timestamp"), "role": doc.get("role", "user" if doc.get("query") else "assistant"), "content": doc.get("content", doc.get("query") or doc.get("response", "")), "context": doc.get("context", []), "sources": doc.get("sources", []), "llm_provider": doc.get("llm_provider"), "feedback": doc.get("feedback"), "rating": doc.get("rating") } # Vector store related methods async def store_vector_metadata( self, document_id: str, chunk_id: str, metadata: Dict[str, Any] ) -> str: """Store vector chunk metadata""" vector_metadata = { "document_id": document_id, "chunk_id": chunk_id, "metadata": metadata, "created_at": datetime.now() } result = await self.db.vector_metadata.insert_one(vector_metadata) return str(result.inserted_id) async def get_vector_metadata( self, document_id: str ) -> List[Dict]: """Get vector metadata for a document""" cursor = self.db.vector_metadata.find( {"document_id": document_id} ) return await cursor.to_list(length=None) async def delete_vector_metadata( self, document_id: str ) -> bool: """Delete vector metadata for a document""" result = await self.db.vector_metadata.delete_many( {"document_id": document_id} ) return result.deleted_count > 0