File size: 4,638 Bytes
7cc4018 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
#!/usr/bin/env python
"""
Migration script to move data from SQLite to MongoDB.
Run this once to migrate existing data to your new MongoDB database.
"""
import os
import sqlite3
import json
from datetime import datetime
from pymongo import MongoClient
from dotenv import load_dotenv
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s [%(levelname)s] %(message)s',
handlers=[
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def migrate_sqlite_to_mongodb():
"""Migrate data from SQLite to MongoDB"""
# Load environment variables
load_dotenv()
# Get MongoDB connection string from environment variable
mongo_uri = os.environ.get("MONGODB_URI")
if not mongo_uri:
logger.error("MONGODB_URI not found in environment variables. Please set it before running this script.")
return False
try:
# Connect to MongoDB
logger.info("Connecting to MongoDB...")
mongo_client = MongoClient(mongo_uri)
# Access database and collection
db = mongo_client["hallucination_detector"]
feedback_collection = db["feedback"]
# Check for existing data
existing_count = feedback_collection.count_documents({})
logger.info(f"MongoDB already contains {existing_count} documents")
# Determine SQLite database path
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
db_path = os.path.join(data_dir, "feedback.db")
if not os.path.exists(db_path):
logger.warning(f"SQLite database not found at {db_path}. No data to migrate.")
return True
# Connect to SQLite
logger.info(f"Connecting to SQLite database at {db_path}...")
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row # This enables column access by name
cursor = conn.cursor()
# Get all records
cursor.execute("SELECT * FROM feedback")
rows = cursor.fetchall()
if not rows:
logger.info("No data found in SQLite database.")
conn.close()
return True
logger.info(f"Found {len(rows)} records in SQLite database")
# Process rows and insert into MongoDB
mongo_docs = []
for row in rows:
# Convert row to dict
row_dict = dict(row)
# Parse special fields
try:
row_dict["paraphrased_queries"] = json.loads(row_dict["paraphrased_queries"])
except:
row_dict["paraphrased_queries"] = []
try:
row_dict["paraphrased_responses"] = json.loads(row_dict["paraphrased_responses"])
except:
row_dict["paraphrased_responses"] = []
try:
row_dict["conflicting_facts"] = json.loads(row_dict["conflicting_facts"])
except:
row_dict["conflicting_facts"] = []
# Convert integer to boolean
row_dict["hallucination_detected"] = bool(row_dict["hallucination_detected"])
# Parse timestamp
try:
row_dict["timestamp"] = datetime.strptime(row_dict["timestamp"], "%Y-%m-%d %H:%M:%S")
except:
row_dict["timestamp"] = datetime.now()
# Remove sqlite id
if "id" in row_dict:
del row_dict["id"]
mongo_docs.append(row_dict)
# Insert all documents
if mongo_docs:
logger.info(f"Inserting {len(mongo_docs)} documents into MongoDB...")
result = feedback_collection.insert_many(mongo_docs)
logger.info(f"Successfully migrated {len(result.inserted_ids)} records to MongoDB")
# Close SQLite connection
conn.close()
# Verify data in MongoDB
new_count = feedback_collection.count_documents({})
logger.info(f"MongoDB now contains {new_count} documents")
return True
except Exception as e:
logger.error(f"Error during migration: {str(e)}", exc_info=True)
return False
if __name__ == "__main__":
logger.info("Starting migration from SQLite to MongoDB")
success = migrate_sqlite_to_mongodb()
if success:
logger.info("Migration completed successfully")
else:
logger.error("Migration failed") |