eval_results / eval_tables.py
davidr70's picture
improvements
6e35819
raw
history blame
3.16 kB
from data_access import get_pg_sync_connection
conn = get_pg_sync_connection()
def create_eval_database():
"""Create SQLite database with a proper relational structure."""
# Connect to the database (creates it if it doesn't exist)
cursor = conn.cursor()
# Create questions table
cursor.execute('''
CREATE TABLE IF NOT EXISTS questions (
id SERIAL PRIMARY KEY,
question_text TEXT NOT NULL,
CONSTRAINT unique_question_text UNIQUE (question_text)
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS rankers (
id SERIAL PRIMARY KEY,
ranker TEXT NOT NULL
);
''')
# Create table for unique sources
cursor.execute('''
CREATE TABLE IF NOT EXISTS baseline_sources (
id SERIAL PRIMARY KEY,
question_id INTEGER NOT NULL,
tractate TEXT NOT NULL,
folio TEXT NOT NULL,
sugya_id TEXT NOT NULL,
rank INTEGER NOT NULL,
reason TEXT,
ranker_id INTEGER NOT NULL,
FOREIGN KEY (question_id) REFERENCES questions(id),
FOREIGN KEY (ranker_id) REFERENCES rankers(id),
CONSTRAINT unique_source_per_question_ranker UNIQUE(question_id, sugya_id, ranker_id)
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS source_finders (
id SERIAL PRIMARY KEY,
source_finder_type TEXT NOT NULL,
description TEXT,
source_finder_version TEXT NOT NULL
);
''')
# Create table for logging all sources from each run
cursor.execute('''
CREATE TABLE IF NOT EXISTS source_runs (
id SERIAL PRIMARY KEY,
source_finder_id INTEGER NOT NULL,
run_id TEXT NOT NULL,
run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
question_id INTEGER NOT NULL,
tractate TEXT NOT NULL,
folio TEXT NOT NULL,
sugya_id TEXT NOT NULL,
rank INTEGER NOT NULL,
reason TEXT,
FOREIGN KEY (source_finder_id) REFERENCES source_finders(id),
FOREIGN KEY (question_id) REFERENCES questions(id)
);
''')
conn.commit()
conn.close()
def load_source_finders():
cursor = conn.cursor()
for item in ["claude_sources", "keywords", "lenses"]:
cursor.execute("INSERT INTO source_finders (source_finder_type, source_finder_version) VALUES (%s, 1)", (item,))
conn.commit()
def load_rankers():
cursor = conn.cursor()
for item in ["claude_sources"]:
cursor.execute("INSERT INTO rankers (ranker) VALUES (%s)", (item,))
conn.commit()
def load_baseline_sources():
# copy all claude values where run_id = 1 from source_runs to baseline_sources
cursor = conn.cursor()
cursor.execute('''
INSERT INTO baseline_sources (question_id, tractate, folio, sugya_id, rank, reason, ranker_id)
SELECT question_id, tractate, folio, sugya_id, rank, reason, 1
FROM source_runs
WHERE run_id = 1 and source_finder_id = 1
''')
conn.commit()
if __name__ == '__main__':
# Create the database
# create_eval_database()
load_baseline_sources()