Spaces:
Running
Running
File size: 5,452 Bytes
b953016 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
# src/utils/database_cleanup.py
from typing import List, Dict
import chromadb
import shutil
from pathlib import Path
from src.utils.logger import logger
from config.config import settings
async def cleanup_chroma():
"""Clean up ChromaDB vector store"""
try:
# Initialize client with allow_reset=True
client = chromadb.PersistentClient(
path=settings.CHROMA_PATH,
settings=chromadb.Settings(
allow_reset=True,
is_persistent=True
)
)
# Get collection names
collection_names = client.list_collections()
# Delete each collection by name
for name in collection_names:
client.delete_collection(name)
# Reset client
client.reset()
# Remove persistence directory
path = Path(settings.CHROMA_PATH)
if path.exists():
shutil.rmtree(path)
return ["All vector store data cleared"]
except Exception as e:
raise Exception(f"ChromaDB cleanup failed: {str(e)}")
async def cleanup_mongodb(mongodb) -> List[str]:
"""
Clean up MongoDB collections
Args:
mongodb: MongoDB store instance
Returns:
List[str]: Details of cleanup operations
"""
details = []
try:
# Drop all collections
await mongodb.chat_history.delete_many({})
details.append("Cleared chat history")
await mongodb.conversations.delete_many({})
details.append("Cleared conversations")
await mongodb.documents.delete_many({})
details.append("Cleared document metadata")
await mongodb.knowledge_base.delete_many({})
details.append("Cleared knowledge base")
if hasattr(mongodb.db, 'vector_metadata'):
await mongodb.db.vector_metadata.delete_many({})
details.append("Cleared vector metadata")
return details
except Exception as e:
raise Exception(f"MongoDB cleanup failed: {str(e)}")
async def cleanup_files() -> List[str]:
"""
Clean up uploaded files
Returns:
List[str]: Details of cleanup operations
"""
details = []
uploads_dir = Path("uploads")
if uploads_dir.exists():
# Get list of files before deletion
files = list(uploads_dir.glob('*'))
# Delete all files
for file in files:
if file.is_file():
file.unlink()
details.append(f"Deleted file: {file.name}")
# Try to remove the directory itself
if not any(uploads_dir.iterdir()):
uploads_dir.rmdir()
details.append("Removed empty uploads directory")
else:
details.append("No uploads directory found")
return details
async def perform_cleanup(
mongodb,
include_files: bool = True
) -> Dict:
"""
Perform comprehensive cleanup of all databases
Args:
mongodb: MongoDB store instance
include_files (bool): Whether to also delete uploaded files
Returns:
Dict: Cleanup operation summary
"""
cleanup_summary = {
"chroma_db": {"status": "not_started", "details": []},
"mongodb": {"status": "not_started", "details": []},
"files": {"status": "not_started", "details": []}
}
try:
# Clean ChromaDB
try:
details = await cleanup_chroma()
cleanup_summary["chroma_db"] = {
"status": "success",
"details": details
}
except Exception as e:
logger.error(f"Error cleaning ChromaDB: {str(e)}")
cleanup_summary["chroma_db"] = {
"status": "error",
"details": [str(e)]
}
# Clean MongoDB
try:
details = await cleanup_mongodb(mongodb)
cleanup_summary["mongodb"] = {
"status": "success",
"details": details
}
except Exception as e:
logger.error(f"Error cleaning MongoDB: {str(e)}")
cleanup_summary["mongodb"] = {
"status": "error",
"details": [str(e)]
}
# Clean files if requested
if include_files:
try:
details = await cleanup_files()
cleanup_summary["files"] = {
"status": "success",
"details": details
}
except Exception as e:
logger.error(f"Error cleaning files: {str(e)}")
cleanup_summary["files"] = {
"status": "error",
"details": [str(e)]
}
# Determine overall status
overall_status = "success"
if any(item["status"] == "error" for item in cleanup_summary.values()):
overall_status = "partial_success"
if all(item["status"] == "error" for item in cleanup_summary.values()):
overall_status = "error"
return {
"status": overall_status,
"message": "Cleanup operation completed",
"details": cleanup_summary
}
except Exception as e:
logger.error(f"Error in cleanup operation: {str(e)}")
raise |