File size: 4,638 Bytes
7cc4018
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python
"""
Migration script to move data from SQLite to MongoDB.
Run this once to migrate existing data to your new MongoDB database.
"""

import os
import sqlite3
import json
from datetime import datetime
from pymongo import MongoClient
from dotenv import load_dotenv
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.StreamHandler()
    ]
)

logger = logging.getLogger(__name__)

def migrate_sqlite_to_mongodb():
    """Migrate data from SQLite to MongoDB"""
    
    # Load environment variables
    load_dotenv()
    
    # Get MongoDB connection string from environment variable
    mongo_uri = os.environ.get("MONGODB_URI")
    
    if not mongo_uri:
        logger.error("MONGODB_URI not found in environment variables. Please set it before running this script.")
        return False
    
    try:
        # Connect to MongoDB
        logger.info("Connecting to MongoDB...")
        mongo_client = MongoClient(mongo_uri)
        
        # Access database and collection
        db = mongo_client["hallucination_detector"]
        feedback_collection = db["feedback"]
        
        # Check for existing data
        existing_count = feedback_collection.count_documents({})
        logger.info(f"MongoDB already contains {existing_count} documents")
        
        # Determine SQLite database path
        data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
        db_path = os.path.join(data_dir, "feedback.db")
        
        if not os.path.exists(db_path):
            logger.warning(f"SQLite database not found at {db_path}. No data to migrate.")
            return True
        
        # Connect to SQLite
        logger.info(f"Connecting to SQLite database at {db_path}...")
        conn = sqlite3.connect(db_path)
        conn.row_factory = sqlite3.Row  # This enables column access by name
        cursor = conn.cursor()
        
        # Get all records
        cursor.execute("SELECT * FROM feedback")
        rows = cursor.fetchall()
        
        if not rows:
            logger.info("No data found in SQLite database.")
            conn.close()
            return True
        
        logger.info(f"Found {len(rows)} records in SQLite database")
        
        # Process rows and insert into MongoDB
        mongo_docs = []
        for row in rows:
            # Convert row to dict
            row_dict = dict(row)
            
            # Parse special fields
            try:
                row_dict["paraphrased_queries"] = json.loads(row_dict["paraphrased_queries"])
            except:
                row_dict["paraphrased_queries"] = []
                
            try:
                row_dict["paraphrased_responses"] = json.loads(row_dict["paraphrased_responses"])
            except:
                row_dict["paraphrased_responses"] = []
                
            try:
                row_dict["conflicting_facts"] = json.loads(row_dict["conflicting_facts"])
            except:
                row_dict["conflicting_facts"] = []
            
            # Convert integer to boolean
            row_dict["hallucination_detected"] = bool(row_dict["hallucination_detected"])
            
            # Parse timestamp
            try:
                row_dict["timestamp"] = datetime.strptime(row_dict["timestamp"], "%Y-%m-%d %H:%M:%S")
            except:
                row_dict["timestamp"] = datetime.now()
            
            # Remove sqlite id
            if "id" in row_dict:
                del row_dict["id"]
            
            mongo_docs.append(row_dict)
        
        # Insert all documents
        if mongo_docs:
            logger.info(f"Inserting {len(mongo_docs)} documents into MongoDB...")
            result = feedback_collection.insert_many(mongo_docs)
            logger.info(f"Successfully migrated {len(result.inserted_ids)} records to MongoDB")
        
        # Close SQLite connection
        conn.close()
        
        # Verify data in MongoDB
        new_count = feedback_collection.count_documents({})
        logger.info(f"MongoDB now contains {new_count} documents")
        
        return True
        
    except Exception as e:
        logger.error(f"Error during migration: {str(e)}", exc_info=True)
        return False

if __name__ == "__main__":
    logger.info("Starting migration from SQLite to MongoDB")
    success = migrate_sqlite_to_mongodb()
    if success:
        logger.info("Migration completed successfully")
    else:
        logger.error("Migration failed")