Spaces:
Sleeping
Sleeping
from data_access import get_pg_sync_connection | |
conn = get_pg_sync_connection() | |
def create_eval_database(): | |
"""Create SQLite database with a proper relational structure.""" | |
# Connect to the database (creates it if it doesn't exist) | |
cursor = conn.cursor() | |
# Create questions table | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS questions ( | |
id SERIAL PRIMARY KEY, | |
question_text TEXT NOT NULL, | |
CONSTRAINT unique_question_text UNIQUE (question_text) | |
); | |
''') | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS rankers ( | |
id SERIAL PRIMARY KEY, | |
ranker TEXT NOT NULL | |
); | |
''') | |
# Create table for unique sources | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS baseline_sources ( | |
id SERIAL PRIMARY KEY, | |
question_id INTEGER NOT NULL, | |
tractate TEXT NOT NULL, | |
folio TEXT NOT NULL, | |
sugya_id TEXT NOT NULL, | |
rank INTEGER NOT NULL, | |
reason TEXT, | |
ranker_id INTEGER NOT NULL, | |
FOREIGN KEY (question_id) REFERENCES questions(id), | |
FOREIGN KEY (ranker_id) REFERENCES rankers(id), | |
CONSTRAINT unique_source_per_question_ranker UNIQUE(question_id, sugya_id, ranker_id) | |
); | |
''') | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS source_finders ( | |
id SERIAL PRIMARY KEY, | |
source_finder_type TEXT NOT NULL, | |
description TEXT, | |
source_finder_version TEXT NOT NULL | |
); | |
''') | |
# Create table for logging all sources from each run | |
cursor.execute(''' | |
CREATE TABLE IF NOT EXISTS source_runs ( | |
id SERIAL PRIMARY KEY, | |
source_finder_id INTEGER NOT NULL, | |
run_id TEXT NOT NULL, | |
run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | |
question_id INTEGER NOT NULL, | |
tractate TEXT NOT NULL, | |
folio TEXT NOT NULL, | |
sugya_id TEXT NOT NULL, | |
rank INTEGER NOT NULL, | |
reason TEXT, | |
FOREIGN KEY (source_finder_id) REFERENCES source_finders(id), | |
FOREIGN KEY (question_id) REFERENCES questions(id) | |
); | |
''') | |
conn.commit() | |
conn.close() | |
def load_source_finders(): | |
cursor = conn.cursor() | |
for item in ["claude_sources", "keywords", "lenses"]: | |
cursor.execute("INSERT INTO source_finders (source_finder_type, source_finder_version) VALUES (%s, 1)", (item,)) | |
conn.commit() | |
def load_rankers(): | |
cursor = conn.cursor() | |
for item in ["claude_sources"]: | |
cursor.execute("INSERT INTO rankers (ranker) VALUES (%s)", (item,)) | |
conn.commit() | |
def load_baseline_sources(): | |
# copy all claude values where run_id = 1 from source_runs to baseline_sources | |
cursor = conn.cursor() | |
cursor.execute(''' | |
INSERT INTO baseline_sources (question_id, tractate, folio, sugya_id, rank, reason, ranker_id) | |
SELECT question_id, tractate, folio, sugya_id, rank, reason, 1 | |
FROM source_runs | |
WHERE run_id = 1 and source_finder_id = 1 | |
''') | |
conn.commit() | |
if __name__ == '__main__': | |
# Create the database | |
# create_eval_database() | |
load_baseline_sources() | |