chatbot-backend / src /utils /database_cleanup.py
TalatMasood's picture
Enhanced the support for the excel file and added endpoint to have optimized vector store and Rag for the Excel.
b953016
raw
history blame
5.45 kB
# src/utils/database_cleanup.py
from typing import List, Dict
import chromadb
import shutil
from pathlib import Path
from src.utils.logger import logger
from config.config import settings
async def cleanup_chroma():
"""Clean up ChromaDB vector store"""
try:
# Initialize client with allow_reset=True
client = chromadb.PersistentClient(
path=settings.CHROMA_PATH,
settings=chromadb.Settings(
allow_reset=True,
is_persistent=True
)
)
# Get collection names
collection_names = client.list_collections()
# Delete each collection by name
for name in collection_names:
client.delete_collection(name)
# Reset client
client.reset()
# Remove persistence directory
path = Path(settings.CHROMA_PATH)
if path.exists():
shutil.rmtree(path)
return ["All vector store data cleared"]
except Exception as e:
raise Exception(f"ChromaDB cleanup failed: {str(e)}")
async def cleanup_mongodb(mongodb) -> List[str]:
"""
Clean up MongoDB collections
Args:
mongodb: MongoDB store instance
Returns:
List[str]: Details of cleanup operations
"""
details = []
try:
# Drop all collections
await mongodb.chat_history.delete_many({})
details.append("Cleared chat history")
await mongodb.conversations.delete_many({})
details.append("Cleared conversations")
await mongodb.documents.delete_many({})
details.append("Cleared document metadata")
await mongodb.knowledge_base.delete_many({})
details.append("Cleared knowledge base")
if hasattr(mongodb.db, 'vector_metadata'):
await mongodb.db.vector_metadata.delete_many({})
details.append("Cleared vector metadata")
return details
except Exception as e:
raise Exception(f"MongoDB cleanup failed: {str(e)}")
async def cleanup_files() -> List[str]:
"""
Clean up uploaded files
Returns:
List[str]: Details of cleanup operations
"""
details = []
uploads_dir = Path("uploads")
if uploads_dir.exists():
# Get list of files before deletion
files = list(uploads_dir.glob('*'))
# Delete all files
for file in files:
if file.is_file():
file.unlink()
details.append(f"Deleted file: {file.name}")
# Try to remove the directory itself
if not any(uploads_dir.iterdir()):
uploads_dir.rmdir()
details.append("Removed empty uploads directory")
else:
details.append("No uploads directory found")
return details
async def perform_cleanup(
mongodb,
include_files: bool = True
) -> Dict:
"""
Perform comprehensive cleanup of all databases
Args:
mongodb: MongoDB store instance
include_files (bool): Whether to also delete uploaded files
Returns:
Dict: Cleanup operation summary
"""
cleanup_summary = {
"chroma_db": {"status": "not_started", "details": []},
"mongodb": {"status": "not_started", "details": []},
"files": {"status": "not_started", "details": []}
}
try:
# Clean ChromaDB
try:
details = await cleanup_chroma()
cleanup_summary["chroma_db"] = {
"status": "success",
"details": details
}
except Exception as e:
logger.error(f"Error cleaning ChromaDB: {str(e)}")
cleanup_summary["chroma_db"] = {
"status": "error",
"details": [str(e)]
}
# Clean MongoDB
try:
details = await cleanup_mongodb(mongodb)
cleanup_summary["mongodb"] = {
"status": "success",
"details": details
}
except Exception as e:
logger.error(f"Error cleaning MongoDB: {str(e)}")
cleanup_summary["mongodb"] = {
"status": "error",
"details": [str(e)]
}
# Clean files if requested
if include_files:
try:
details = await cleanup_files()
cleanup_summary["files"] = {
"status": "success",
"details": details
}
except Exception as e:
logger.error(f"Error cleaning files: {str(e)}")
cleanup_summary["files"] = {
"status": "error",
"details": [str(e)]
}
# Determine overall status
overall_status = "success"
if any(item["status"] == "error" for item in cleanup_summary.values()):
overall_status = "partial_success"
if all(item["status"] == "error" for item in cleanup_summary.values()):
overall_status = "error"
return {
"status": overall_status,
"message": "Cleanup operation completed",
"details": cleanup_summary
}
except Exception as e:
logger.error(f"Error in cleanup operation: {str(e)}")
raise