from data_access import get_pg_sync_connection conn = get_pg_sync_connection() def create_eval_database(): """Create SQLite database with a proper relational structure.""" # Connect to the database (creates it if it doesn't exist) cursor = conn.cursor() # Create questions table cursor.execute(''' CREATE TABLE IF NOT EXISTS questions ( id SERIAL PRIMARY KEY, question_text TEXT NOT NULL, CONSTRAINT unique_question_text UNIQUE (question_text) ); ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS rankers ( id SERIAL PRIMARY KEY, ranker TEXT NOT NULL ); ''') # Create table for unique sources cursor.execute(''' CREATE TABLE IF NOT EXISTS baseline_sources ( id SERIAL PRIMARY KEY, question_id INTEGER NOT NULL, tractate TEXT NOT NULL, folio TEXT NOT NULL, sugya_id TEXT NOT NULL, rank INTEGER NOT NULL, reason TEXT, ranker_id INTEGER NOT NULL, FOREIGN KEY (question_id) REFERENCES questions(id), FOREIGN KEY (ranker_id) REFERENCES rankers(id), CONSTRAINT unique_source_per_question_ranker UNIQUE(question_id, sugya_id, ranker_id) ); ''') cursor.execute(''' CREATE TABLE IF NOT EXISTS source_finders ( id SERIAL PRIMARY KEY, source_finder_type TEXT NOT NULL, description TEXT, source_finder_version TEXT NOT NULL ); ''') # Create table for logging all sources from each run cursor.execute(''' CREATE TABLE IF NOT EXISTS source_runs ( id SERIAL PRIMARY KEY, source_finder_id INTEGER NOT NULL, run_id TEXT NOT NULL, run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, question_id INTEGER NOT NULL, tractate TEXT NOT NULL, folio TEXT NOT NULL, sugya_id TEXT NOT NULL, rank INTEGER NOT NULL, reason TEXT, FOREIGN KEY (source_finder_id) REFERENCES source_finders(id), FOREIGN KEY (question_id) REFERENCES questions(id) ); ''') conn.commit() conn.close() def load_source_finders(): cursor = conn.cursor() for item in ["claude_sources", "keywords", "lenses"]: cursor.execute("INSERT INTO source_finders (source_finder_type, source_finder_version) VALUES (%s, 1)", (item,)) conn.commit() def load_rankers(): cursor = conn.cursor() for item in ["claude_sources"]: cursor.execute("INSERT INTO rankers (ranker) VALUES (%s)", (item,)) conn.commit() def load_baseline_sources(): # copy all claude values where run_id = 1 from source_runs to baseline_sources cursor = conn.cursor() cursor.execute(''' INSERT INTO baseline_sources (question_id, tractate, folio, sugya_id, rank, reason, ranker_id) SELECT question_id, tractate, folio, sugya_id, rank, reason, 1 FROM source_runs WHERE run_id = 1 and source_finder_id = 1 ''') conn.commit() if __name__ == '__main__': # Create the database # create_eval_database() load_baseline_sources()