File size: 5,452 Bytes
b953016
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# src/utils/database_cleanup.py
from typing import List, Dict
import chromadb
import shutil
from pathlib import Path
from src.utils.logger import logger
from config.config import settings

async def cleanup_chroma():
    """Clean up ChromaDB vector store"""
    try:
        # Initialize client with allow_reset=True
        client = chromadb.PersistentClient(
            path=settings.CHROMA_PATH,
            settings=chromadb.Settings(
                allow_reset=True,
                is_persistent=True
            )
        )
        
        # Get collection names
        collection_names = client.list_collections()
        
        # Delete each collection by name
        for name in collection_names:
            client.delete_collection(name)
            
        # Reset client
        client.reset()
        
        # Remove persistence directory
        path = Path(settings.CHROMA_PATH)
        if path.exists():
            shutil.rmtree(path)
            
        return ["All vector store data cleared"]
    except Exception as e:
        raise Exception(f"ChromaDB cleanup failed: {str(e)}")
    
async def cleanup_mongodb(mongodb) -> List[str]:
    """
    Clean up MongoDB collections
    
    Args:
        mongodb: MongoDB store instance
        
    Returns:
        List[str]: Details of cleanup operations
    """
    details = []
    
    try:
        # Drop all collections
        await mongodb.chat_history.delete_many({})
        details.append("Cleared chat history")
        
        await mongodb.conversations.delete_many({})
        details.append("Cleared conversations")
        
        await mongodb.documents.delete_many({})
        details.append("Cleared document metadata")
        
        await mongodb.knowledge_base.delete_many({})
        details.append("Cleared knowledge base")
        
        if hasattr(mongodb.db, 'vector_metadata'):
            await mongodb.db.vector_metadata.delete_many({})
            details.append("Cleared vector metadata")
            
        return details
    except Exception as e:
        raise Exception(f"MongoDB cleanup failed: {str(e)}")

async def cleanup_files() -> List[str]:
    """
    Clean up uploaded files
    
    Returns:
        List[str]: Details of cleanup operations
    """
    details = []
    uploads_dir = Path("uploads")
    
    if uploads_dir.exists():
        # Get list of files before deletion
        files = list(uploads_dir.glob('*'))
        
        # Delete all files
        for file in files:
            if file.is_file():
                file.unlink()
                details.append(f"Deleted file: {file.name}")
        
        # Try to remove the directory itself
        if not any(uploads_dir.iterdir()):
            uploads_dir.rmdir()
            details.append("Removed empty uploads directory")
    else:
        details.append("No uploads directory found")
        
    return details

async def perform_cleanup(
    mongodb,
    include_files: bool = True
) -> Dict:
    """
    Perform comprehensive cleanup of all databases
    
    Args:
        mongodb: MongoDB store instance
        include_files (bool): Whether to also delete uploaded files
        
    Returns:
        Dict: Cleanup operation summary
    """
    cleanup_summary = {
        "chroma_db": {"status": "not_started", "details": []},
        "mongodb": {"status": "not_started", "details": []},
        "files": {"status": "not_started", "details": []}
    }
    
    try:
        # Clean ChromaDB
        try:
            details = await cleanup_chroma()
            cleanup_summary["chroma_db"] = {
                "status": "success",
                "details": details
            }
        except Exception as e:
            logger.error(f"Error cleaning ChromaDB: {str(e)}")
            cleanup_summary["chroma_db"] = {
                "status": "error",
                "details": [str(e)]
            }

        # Clean MongoDB
        try:
            details = await cleanup_mongodb(mongodb)
            cleanup_summary["mongodb"] = {
                "status": "success",
                "details": details
            }
        except Exception as e:
            logger.error(f"Error cleaning MongoDB: {str(e)}")
            cleanup_summary["mongodb"] = {
                "status": "error",
                "details": [str(e)]
            }

        # Clean files if requested
        if include_files:
            try:
                details = await cleanup_files()
                cleanup_summary["files"] = {
                    "status": "success",
                    "details": details
                }
            except Exception as e:
                logger.error(f"Error cleaning files: {str(e)}")
                cleanup_summary["files"] = {
                    "status": "error",
                    "details": [str(e)]
                }

        # Determine overall status
        overall_status = "success"
        if any(item["status"] == "error" for item in cleanup_summary.values()):
            overall_status = "partial_success"
        if all(item["status"] == "error" for item in cleanup_summary.values()):
            overall_status = "error"

        return {
            "status": overall_status,
            "message": "Cleanup operation completed",
            "details": cleanup_summary
        }

    except Exception as e:
        logger.error(f"Error in cleanup operation: {str(e)}")
        raise