Spaces:
Running
Running
Enhanced the support for the excel file and added endpoint to have optimized vector store and Rag for the Excel.
b953016
# src/utils/database_cleanup.py | |
from typing import List, Dict | |
import chromadb | |
import shutil | |
from pathlib import Path | |
from src.utils.logger import logger | |
from config.config import settings | |
async def cleanup_chroma(): | |
"""Clean up ChromaDB vector store""" | |
try: | |
# Initialize client with allow_reset=True | |
client = chromadb.PersistentClient( | |
path=settings.CHROMA_PATH, | |
settings=chromadb.Settings( | |
allow_reset=True, | |
is_persistent=True | |
) | |
) | |
# Get collection names | |
collection_names = client.list_collections() | |
# Delete each collection by name | |
for name in collection_names: | |
client.delete_collection(name) | |
# Reset client | |
client.reset() | |
# Remove persistence directory | |
path = Path(settings.CHROMA_PATH) | |
if path.exists(): | |
shutil.rmtree(path) | |
return ["All vector store data cleared"] | |
except Exception as e: | |
raise Exception(f"ChromaDB cleanup failed: {str(e)}") | |
async def cleanup_mongodb(mongodb) -> List[str]: | |
""" | |
Clean up MongoDB collections | |
Args: | |
mongodb: MongoDB store instance | |
Returns: | |
List[str]: Details of cleanup operations | |
""" | |
details = [] | |
try: | |
# Drop all collections | |
await mongodb.chat_history.delete_many({}) | |
details.append("Cleared chat history") | |
await mongodb.conversations.delete_many({}) | |
details.append("Cleared conversations") | |
await mongodb.documents.delete_many({}) | |
details.append("Cleared document metadata") | |
await mongodb.knowledge_base.delete_many({}) | |
details.append("Cleared knowledge base") | |
if hasattr(mongodb.db, 'vector_metadata'): | |
await mongodb.db.vector_metadata.delete_many({}) | |
details.append("Cleared vector metadata") | |
return details | |
except Exception as e: | |
raise Exception(f"MongoDB cleanup failed: {str(e)}") | |
async def cleanup_files() -> List[str]: | |
""" | |
Clean up uploaded files | |
Returns: | |
List[str]: Details of cleanup operations | |
""" | |
details = [] | |
uploads_dir = Path("uploads") | |
if uploads_dir.exists(): | |
# Get list of files before deletion | |
files = list(uploads_dir.glob('*')) | |
# Delete all files | |
for file in files: | |
if file.is_file(): | |
file.unlink() | |
details.append(f"Deleted file: {file.name}") | |
# Try to remove the directory itself | |
if not any(uploads_dir.iterdir()): | |
uploads_dir.rmdir() | |
details.append("Removed empty uploads directory") | |
else: | |
details.append("No uploads directory found") | |
return details | |
async def perform_cleanup( | |
mongodb, | |
include_files: bool = True | |
) -> Dict: | |
""" | |
Perform comprehensive cleanup of all databases | |
Args: | |
mongodb: MongoDB store instance | |
include_files (bool): Whether to also delete uploaded files | |
Returns: | |
Dict: Cleanup operation summary | |
""" | |
cleanup_summary = { | |
"chroma_db": {"status": "not_started", "details": []}, | |
"mongodb": {"status": "not_started", "details": []}, | |
"files": {"status": "not_started", "details": []} | |
} | |
try: | |
# Clean ChromaDB | |
try: | |
details = await cleanup_chroma() | |
cleanup_summary["chroma_db"] = { | |
"status": "success", | |
"details": details | |
} | |
except Exception as e: | |
logger.error(f"Error cleaning ChromaDB: {str(e)}") | |
cleanup_summary["chroma_db"] = { | |
"status": "error", | |
"details": [str(e)] | |
} | |
# Clean MongoDB | |
try: | |
details = await cleanup_mongodb(mongodb) | |
cleanup_summary["mongodb"] = { | |
"status": "success", | |
"details": details | |
} | |
except Exception as e: | |
logger.error(f"Error cleaning MongoDB: {str(e)}") | |
cleanup_summary["mongodb"] = { | |
"status": "error", | |
"details": [str(e)] | |
} | |
# Clean files if requested | |
if include_files: | |
try: | |
details = await cleanup_files() | |
cleanup_summary["files"] = { | |
"status": "success", | |
"details": details | |
} | |
except Exception as e: | |
logger.error(f"Error cleaning files: {str(e)}") | |
cleanup_summary["files"] = { | |
"status": "error", | |
"details": [str(e)] | |
} | |
# Determine overall status | |
overall_status = "success" | |
if any(item["status"] == "error" for item in cleanup_summary.values()): | |
overall_status = "partial_success" | |
if all(item["status"] == "error" for item in cleanup_summary.values()): | |
overall_status = "error" | |
return { | |
"status": overall_status, | |
"message": "Cleanup operation completed", | |
"details": cleanup_summary | |
} | |
except Exception as e: | |
logger.error(f"Error in cleanup operation: {str(e)}") | |
raise |