chatbot-backend / src /db /mongodb_store.py
TalatMasood's picture
Log google drive documents in the mongodb, add source of the document and made chunks to overlap text.
acdfaa9
raw
history blame
11.8 kB
# src/db/mongodb_store.py
from motor.motor_asyncio import AsyncIOMotorClient
from datetime import datetime
from typing import List, Dict, Optional, Any
from bson import ObjectId
class MongoDBStore:
def __init__(self, mongo_uri: str = "mongodb://localhost:27017"):
"""Initialize MongoDB connection"""
self.client = AsyncIOMotorClient(mongo_uri)
self.db = self.client.db_chatbot
self.chat_history = self.db.chat_history
self.conversations = self.db.conversations
self.documents = self.db.knowledge_base
# Document-related methods
async def store_document(
self,
document_id: str,
filename: str,
content_type: str,
file_size: int,
url_path: str,
source: str
) -> str:
"""Store document metadata in MongoDB"""
document = {
"document_id": document_id,
"filename": filename,
"content_type": content_type,
"file_size": file_size,
"url_path": url_path,
"source": source,
"upload_timestamp": datetime.now()
}
await self.documents.insert_one(document)
return document_id
async def get_document(self, document_id: str) -> Optional[Dict]:
"""Retrieve document by ID"""
return await self.documents.find_one(
{"document_id": document_id},
{"_id": 0}
)
async def get_all_documents(self) -> List[Dict]:
"""Retrieve all documents"""
cursor = self.documents.find(
{},
{
"_id": 0,
"document_id": 1,
"filename": 1,
"content_type": 1,
"file_size": 1,
"url_path": 1,
"upload_timestamp": 1,
"source": 1
}
)
return await cursor.to_list(length=None)
async def delete_document(self, document_id: str) -> bool:
"""Delete document from MongoDB"""
result = await self.documents.delete_one({"document_id": document_id})
return result.deleted_count > 0
async def find_existing_user(
self,
email: str,
phone_number: str
) -> Optional[str]:
"""
Find existing user by email or phone number
Args:
email (str): User's email
phone_number (str): User's phone number
Returns:
Optional[str]: Conversation ID if found, None otherwise
"""
result = await self.conversations.find_one({
"$or": [
{"email": email},
{"phone_number": phone_number}
]
})
return result["conversation_id"] if result else None
# Conversation and chat history methods
async def create_conversation(
self,
conversation_id: str,
metadata: Optional[Dict] = None,
full_name: Optional[str] = None,
email: Optional[str] = None,
phone_number: Optional[str] = None
) -> str:
"""
Create a new conversation
Args:
conversation_id (str): Unique conversation ID
metadata (Optional[Dict]): Additional metadata
full_name (Optional[str]): User's full name
email (Optional[str]): User's email
phone_number (Optional[str]): User's phone number
Returns:
str: Conversation ID
"""
conversation = {
"conversation_id": conversation_id,
"created_at": datetime.now(),
"last_updated": datetime.now(),
"message_count": 0,
"metadata": metadata or {}
}
# Add user information if provided
if full_name:
conversation["full_name"] = full_name
if email:
conversation["email"] = email
if phone_number:
conversation["phone_number"] = phone_number
await self.conversations.insert_one(conversation)
return conversation_id
async def get_conversation_metadata(
self,
conversation_id: str
) -> Optional[Dict]:
"""Get conversation metadata"""
result = await self.conversations.find_one(
{"conversation_id": conversation_id}
)
if result:
result["_id"] = str(result["_id"])
return result
async def update_conversation_metadata(
self,
conversation_id: str,
metadata: Dict
) -> bool:
"""Update conversation metadata"""
result = await self.conversations.update_one(
{"conversation_id": conversation_id},
{
"$set": {
"metadata": metadata,
"last_updated": datetime.now()
}
}
)
return result.modified_count > 0
# Update the store_message method:
async def store_message(
self,
conversation_id: str,
query: str,
response: str,
context: List[str],
sources: List[Dict],
llm_provider: str
) -> str:
"""Store chat message in MongoDB"""
# Store user message
user_message = {
"conversation_id": conversation_id,
"timestamp": datetime.now(),
"role": "user",
"content": query,
"query": query, # Keep for backward compatibility
"response": None,
"context": context,
"sources": sources,
"llm_provider": llm_provider,
"feedback": None,
"rating": None
}
await self.chat_history.insert_one(user_message)
# Store assistant message
assistant_message = {
"conversation_id": conversation_id,
"timestamp": datetime.now(),
"role": "assistant",
"content": response,
"query": None,
"response": response, # Keep for backward compatibility
"context": context,
"sources": sources,
"llm_provider": llm_provider,
"feedback": None,
"rating": None
}
result = await self.chat_history.insert_one(assistant_message)
# Update conversation metadata
await self.conversations.update_one(
{"conversation_id": conversation_id},
{
"$set": {"last_updated": datetime.now()},
# Increment by 2 since we store both messages
"$inc": {"message_count": 2}
},
upsert=True
)
return str(result.inserted_id)
async def get_conversation_history(self, conversation_id: str) -> List[Dict]:
"""Retrieve complete conversation history"""
cursor = self.chat_history.find(
{"conversation_id": conversation_id}
).sort("timestamp", 1)
history = []
async for document in cursor:
document["_id"] = str(document["_id"])
history.append(document)
return history
async def get_recent_messages(
self,
conversation_id: str,
limit: int = 5
) -> List[Dict]:
"""Get most recent messages from conversation"""
cursor = self.chat_history.find(
{"conversation_id": conversation_id}
# Multiply limit by 2 to account for user-assistant pairs
).sort("timestamp", -1).limit(limit * 2)
messages = []
async for doc in cursor:
messages.append(self._format_message(doc))
return list(reversed(messages))
async def update_feedback(
self,
conversation_id: str,
feedback: Optional[str],
rating: Optional[int]
) -> bool:
"""
Update feedback for a conversation
Args:
conversation_id (str): Conversation ID
feedback (Optional[str]): Feedback text
rating (Optional[int]): Numeric rating
Returns:
bool: True if update successful
"""
update_fields = {}
if feedback is not None:
update_fields["feedback"] = feedback
if rating is not None:
from config.config import settings
formatted_rating = f"{rating}/{settings.MAX_RATING}"
update_fields.update({
"rating": rating, # Store numeric value
"formatted_rating": formatted_rating # Store formatted string
})
if not update_fields:
return False
result = await self.chat_history.update_many(
{"conversation_id": conversation_id},
{"$set": update_fields}
)
# Also update conversation metadata
if result.modified_count > 0:
await self.update_conversation_metadata(
conversation_id,
{
"last_feedback": datetime.now(),
"last_rating": rating if rating is not None else None,
"formatted_rating": formatted_rating if rating is not None else None
}
)
return result.modified_count > 0
async def get_messages_for_summary(
self,
conversation_id: str
) -> List[Dict]:
"""Get messages in format suitable for summarization"""
cursor = self.chat_history.find(
{"conversation_id": conversation_id}
).sort("timestamp", 1)
messages = []
async for doc in cursor:
formatted = self._format_message(doc)
# For summary, we only need specific fields
messages.append({
'role': formatted['role'],
'content': formatted['content'],
'timestamp': formatted['timestamp'],
'sources': formatted['sources']
})
return messages
def _format_message(self, doc: Dict) -> Dict:
"""Helper method to format message documents consistently"""
return {
"_id": str(doc["_id"]) if "_id" in doc else None,
"conversation_id": doc.get("conversation_id"),
"timestamp": doc.get("timestamp"),
"role": doc.get("role", "user" if doc.get("query") else "assistant"),
"content": doc.get("content", doc.get("query") or doc.get("response", "")),
"context": doc.get("context", []),
"sources": doc.get("sources", []),
"llm_provider": doc.get("llm_provider"),
"feedback": doc.get("feedback"),
"rating": doc.get("rating")
}
# Vector store related methods
async def store_vector_metadata(
self,
document_id: str,
chunk_id: str,
metadata: Dict[str, Any]
) -> str:
"""Store vector chunk metadata"""
vector_metadata = {
"document_id": document_id,
"chunk_id": chunk_id,
"metadata": metadata,
"created_at": datetime.now()
}
result = await self.db.vector_metadata.insert_one(vector_metadata)
return str(result.inserted_id)
async def get_vector_metadata(
self,
document_id: str
) -> List[Dict]:
"""Get vector metadata for a document"""
cursor = self.db.vector_metadata.find(
{"document_id": document_id}
)
return await cursor.to_list(length=None)
async def delete_vector_metadata(
self,
document_id: str
) -> bool:
"""Delete vector metadata for a document"""
result = await self.db.vector_metadata.delete_many(
{"document_id": document_id}
)
return result.deleted_count > 0