#!/usr/bin/env python """ Migration script to move data from SQLite to MongoDB. Run this once to migrate existing data to your new MongoDB database. """ import os import sqlite3 import json from datetime import datetime from pymongo import MongoClient from dotenv import load_dotenv import logging # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s [%(levelname)s] %(message)s', handlers=[ logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def migrate_sqlite_to_mongodb(): """Migrate data from SQLite to MongoDB""" # Load environment variables load_dotenv() # Get MongoDB connection string from environment variable mongo_uri = os.environ.get("MONGODB_URI") if not mongo_uri: logger.error("MONGODB_URI not found in environment variables. Please set it before running this script.") return False try: # Connect to MongoDB logger.info("Connecting to MongoDB...") mongo_client = MongoClient(mongo_uri) # Access database and collection db = mongo_client["hallucination_detector"] feedback_collection = db["feedback"] # Check for existing data existing_count = feedback_collection.count_documents({}) logger.info(f"MongoDB already contains {existing_count} documents") # Determine SQLite database path data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") db_path = os.path.join(data_dir, "feedback.db") if not os.path.exists(db_path): logger.warning(f"SQLite database not found at {db_path}. No data to migrate.") return True # Connect to SQLite logger.info(f"Connecting to SQLite database at {db_path}...") conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row # This enables column access by name cursor = conn.cursor() # Get all records cursor.execute("SELECT * FROM feedback") rows = cursor.fetchall() if not rows: logger.info("No data found in SQLite database.") conn.close() return True logger.info(f"Found {len(rows)} records in SQLite database") # Process rows and insert into MongoDB mongo_docs = [] for row in rows: # Convert row to dict row_dict = dict(row) # Parse special fields try: row_dict["paraphrased_queries"] = json.loads(row_dict["paraphrased_queries"]) except: row_dict["paraphrased_queries"] = [] try: row_dict["paraphrased_responses"] = json.loads(row_dict["paraphrased_responses"]) except: row_dict["paraphrased_responses"] = [] try: row_dict["conflicting_facts"] = json.loads(row_dict["conflicting_facts"]) except: row_dict["conflicting_facts"] = [] # Convert integer to boolean row_dict["hallucination_detected"] = bool(row_dict["hallucination_detected"]) # Parse timestamp try: row_dict["timestamp"] = datetime.strptime(row_dict["timestamp"], "%Y-%m-%d %H:%M:%S") except: row_dict["timestamp"] = datetime.now() # Remove sqlite id if "id" in row_dict: del row_dict["id"] mongo_docs.append(row_dict) # Insert all documents if mongo_docs: logger.info(f"Inserting {len(mongo_docs)} documents into MongoDB...") result = feedback_collection.insert_many(mongo_docs) logger.info(f"Successfully migrated {len(result.inserted_ids)} records to MongoDB") # Close SQLite connection conn.close() # Verify data in MongoDB new_count = feedback_collection.count_documents({}) logger.info(f"MongoDB now contains {new_count} documents") return True except Exception as e: logger.error(f"Error during migration: {str(e)}", exc_info=True) return False if __name__ == "__main__": logger.info("Starting migration from SQLite to MongoDB") success = migrate_sqlite_to_mongodb() if success: logger.info("Migration completed successfully") else: logger.error("Migration failed")