# src/utils/database_cleanup.py from typing import List, Dict import chromadb import shutil from pathlib import Path from src.utils.logger import logger from config.config import settings async def cleanup_chroma(): """Clean up ChromaDB vector store""" try: # Initialize client with allow_reset=True client = chromadb.PersistentClient( path=settings.CHROMA_PATH, settings=chromadb.Settings( allow_reset=True, is_persistent=True ) ) # Get collection names collection_names = client.list_collections() # Delete each collection by name for name in collection_names: client.delete_collection(name) # Reset client client.reset() # Remove persistence directory path = Path(settings.CHROMA_PATH) if path.exists(): shutil.rmtree(path) return ["All vector store data cleared"] except Exception as e: raise Exception(f"ChromaDB cleanup failed: {str(e)}") async def cleanup_mongodb(mongodb) -> List[str]: """ Clean up MongoDB collections Args: mongodb: MongoDB store instance Returns: List[str]: Details of cleanup operations """ details = [] try: # Drop all collections await mongodb.chat_history.delete_many({}) details.append("Cleared chat history") await mongodb.conversations.delete_many({}) details.append("Cleared conversations") await mongodb.documents.delete_many({}) details.append("Cleared document metadata") await mongodb.knowledge_base.delete_many({}) details.append("Cleared knowledge base") if hasattr(mongodb.db, 'vector_metadata'): await mongodb.db.vector_metadata.delete_many({}) details.append("Cleared vector metadata") return details except Exception as e: raise Exception(f"MongoDB cleanup failed: {str(e)}") async def cleanup_files() -> List[str]: """ Clean up uploaded files Returns: List[str]: Details of cleanup operations """ details = [] uploads_dir = Path("uploads") if uploads_dir.exists(): # Get list of files before deletion files = list(uploads_dir.glob('*')) # Delete all files for file in files: if file.is_file(): file.unlink() details.append(f"Deleted file: {file.name}") # Try to remove the directory itself if not any(uploads_dir.iterdir()): uploads_dir.rmdir() details.append("Removed empty uploads directory") else: details.append("No uploads directory found") return details async def perform_cleanup( mongodb, include_files: bool = True ) -> Dict: """ Perform comprehensive cleanup of all databases Args: mongodb: MongoDB store instance include_files (bool): Whether to also delete uploaded files Returns: Dict: Cleanup operation summary """ cleanup_summary = { "chroma_db": {"status": "not_started", "details": []}, "mongodb": {"status": "not_started", "details": []}, "files": {"status": "not_started", "details": []} } try: # Clean ChromaDB try: details = await cleanup_chroma() cleanup_summary["chroma_db"] = { "status": "success", "details": details } except Exception as e: logger.error(f"Error cleaning ChromaDB: {str(e)}") cleanup_summary["chroma_db"] = { "status": "error", "details": [str(e)] } # Clean MongoDB try: details = await cleanup_mongodb(mongodb) cleanup_summary["mongodb"] = { "status": "success", "details": details } except Exception as e: logger.error(f"Error cleaning MongoDB: {str(e)}") cleanup_summary["mongodb"] = { "status": "error", "details": [str(e)] } # Clean files if requested if include_files: try: details = await cleanup_files() cleanup_summary["files"] = { "status": "success", "details": details } except Exception as e: logger.error(f"Error cleaning files: {str(e)}") cleanup_summary["files"] = { "status": "error", "details": [str(e)] } # Determine overall status overall_status = "success" if any(item["status"] == "error" for item in cleanup_summary.values()): overall_status = "partial_success" if all(item["status"] == "error" for item in cleanup_summary.values()): overall_status = "error" return { "status": overall_status, "message": "Cleanup operation completed", "details": cleanup_summary } except Exception as e: logger.error(f"Error in cleanup operation: {str(e)}") raise