|
|
|
""" |
|
Migration script to move data from SQLite to MongoDB. |
|
Run this once to migrate existing data to your new MongoDB database. |
|
""" |
|
|
|
import os |
|
import sqlite3 |
|
import json |
|
from datetime import datetime |
|
from pymongo import MongoClient |
|
from dotenv import load_dotenv |
|
import logging |
|
|
|
|
|
logging.basicConfig( |
|
level=logging.INFO, |
|
format='%(asctime)s [%(levelname)s] %(message)s', |
|
handlers=[ |
|
logging.StreamHandler() |
|
] |
|
) |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
def migrate_sqlite_to_mongodb(): |
|
"""Migrate data from SQLite to MongoDB""" |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
mongo_uri = os.environ.get("MONGODB_URI") |
|
|
|
if not mongo_uri: |
|
logger.error("MONGODB_URI not found in environment variables. Please set it before running this script.") |
|
return False |
|
|
|
try: |
|
|
|
logger.info("Connecting to MongoDB...") |
|
mongo_client = MongoClient(mongo_uri) |
|
|
|
|
|
db = mongo_client["hallucination_detector"] |
|
feedback_collection = db["feedback"] |
|
|
|
|
|
existing_count = feedback_collection.count_documents({}) |
|
logger.info(f"MongoDB already contains {existing_count} documents") |
|
|
|
|
|
data_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") |
|
db_path = os.path.join(data_dir, "feedback.db") |
|
|
|
if not os.path.exists(db_path): |
|
logger.warning(f"SQLite database not found at {db_path}. No data to migrate.") |
|
return True |
|
|
|
|
|
logger.info(f"Connecting to SQLite database at {db_path}...") |
|
conn = sqlite3.connect(db_path) |
|
conn.row_factory = sqlite3.Row |
|
cursor = conn.cursor() |
|
|
|
|
|
cursor.execute("SELECT * FROM feedback") |
|
rows = cursor.fetchall() |
|
|
|
if not rows: |
|
logger.info("No data found in SQLite database.") |
|
conn.close() |
|
return True |
|
|
|
logger.info(f"Found {len(rows)} records in SQLite database") |
|
|
|
|
|
mongo_docs = [] |
|
for row in rows: |
|
|
|
row_dict = dict(row) |
|
|
|
|
|
try: |
|
row_dict["paraphrased_queries"] = json.loads(row_dict["paraphrased_queries"]) |
|
except: |
|
row_dict["paraphrased_queries"] = [] |
|
|
|
try: |
|
row_dict["paraphrased_responses"] = json.loads(row_dict["paraphrased_responses"]) |
|
except: |
|
row_dict["paraphrased_responses"] = [] |
|
|
|
try: |
|
row_dict["conflicting_facts"] = json.loads(row_dict["conflicting_facts"]) |
|
except: |
|
row_dict["conflicting_facts"] = [] |
|
|
|
|
|
row_dict["hallucination_detected"] = bool(row_dict["hallucination_detected"]) |
|
|
|
|
|
try: |
|
row_dict["timestamp"] = datetime.strptime(row_dict["timestamp"], "%Y-%m-%d %H:%M:%S") |
|
except: |
|
row_dict["timestamp"] = datetime.now() |
|
|
|
|
|
if "id" in row_dict: |
|
del row_dict["id"] |
|
|
|
mongo_docs.append(row_dict) |
|
|
|
|
|
if mongo_docs: |
|
logger.info(f"Inserting {len(mongo_docs)} documents into MongoDB...") |
|
result = feedback_collection.insert_many(mongo_docs) |
|
logger.info(f"Successfully migrated {len(result.inserted_ids)} records to MongoDB") |
|
|
|
|
|
conn.close() |
|
|
|
|
|
new_count = feedback_collection.count_documents({}) |
|
logger.info(f"MongoDB now contains {new_count} documents") |
|
|
|
return True |
|
|
|
except Exception as e: |
|
logger.error(f"Error during migration: {str(e)}", exc_info=True) |
|
return False |
|
|
|
if __name__ == "__main__": |
|
logger.info("Starting migration from SQLite to MongoDB") |
|
success = migrate_sqlite_to_mongodb() |
|
if success: |
|
logger.info("Migration completed successfully") |
|
else: |
|
logger.error("Migration failed") |