Spaces:
Running
Running
File size: 4,088 Bytes
6e35819 312213e 6e35819 312213e 6e35819 312213e 6e35819 312213e 6e35819 312213e 6e35819 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
from data_access import get_pg_sync_connection
conn = get_pg_sync_connection()
def create_eval_database():
"""Create SQLite database with a proper relational structure."""
# Connect to the database (creates it if it doesn't exist)
cursor = conn.cursor()
# Create questions table
cursor.execute('''
CREATE TABLE IF NOT EXISTS questions (
id SERIAL PRIMARY KEY,
question_text TEXT NOT NULL,
CONSTRAINT unique_question_text UNIQUE (question_text)
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS rankers (
id SERIAL PRIMARY KEY,
ranker TEXT NOT NULL
);
''')
# Create table for unique sources
cursor.execute('''
CREATE TABLE IF NOT EXISTS baseline_sources (
id SERIAL PRIMARY KEY,
question_id INTEGER NOT NULL,
tractate TEXT NOT NULL,
folio TEXT NOT NULL,
sugya_id TEXT NOT NULL,
rank INTEGER NOT NULL,
reason TEXT,
ranker_id INTEGER NOT NULL,
FOREIGN KEY (question_id) REFERENCES questions(id),
FOREIGN KEY (ranker_id) REFERENCES rankers(id),
CONSTRAINT unique_source_per_question_ranker UNIQUE(question_id, sugya_id, ranker_id)
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS source_finders (
id SERIAL PRIMARY KEY,
source_finder_type TEXT NOT NULL,
description TEXT,
source_finder_version TEXT NOT NULL
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS source_finder_runs (
id SERIAL PRIMARY KEY,
run_id INTEGER NOT NULL,
source_finder_id INTEGER NOT NULL,
description TEXT,
FOREIGN KEY (source_finder_id) REFERENCES source_finders(id),
CONSTRAINT unique_source_per_run_id UNIQUE(run_id, source_finder_id)
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS source_finder_run_question_metadata (
id SERIAL PRIMARY KEY,
question_id INTEGER NOT NULL,
source_finder_run_id INTEGER NOT NULL,
metadata JSON,
FOREIGN KEY (source_finder_run_id) REFERENCES source_finder_runs(id),
FOREIGN KEY (question_id) REFERENCES questions(id),
CONSTRAINT unique_question_per_run_id UNIQUE(question_id, source_finder_run_id)
);
''')
# Create table for logging all sources from each run
cursor.execute('''
CREATE TABLE IF NOT EXISTS source_run_results (
id SERIAL PRIMARY KEY,
source_finder_run_id INTEGER NOT NULL,
run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
question_id INTEGER NOT NULL,
tractate TEXT NOT NULL,
folio TEXT NOT NULL,
sugya_id TEXT NOT NULL,
rank INTEGER NOT NULL,
reason TEXT,
FOREIGN KEY (source_finder_run_id) REFERENCES source_finder_runs(id),
FOREIGN KEY (question_id) REFERENCES questions(id)
);
''')
conn.commit()
conn.close()
def load_source_finders():
cursor = conn.cursor()
for item in ["claude_sources", "keywords", "lenses"]:
cursor.execute("INSERT INTO source_finders (source_finder_type, source_finder_version) VALUES (%s, 1)", (item,))
conn.commit()
def load_rankers():
cursor = conn.cursor()
for item in ["claude_sources"]:
cursor.execute("INSERT INTO rankers (ranker) VALUES (%s)", (item,))
conn.commit()
def load_baseline_sources():
# copy all claude values where run_id = 1 from source_runs to baseline_sources
cursor = conn.cursor()
cursor.execute('''
INSERT INTO baseline_sources (question_id, tractate, folio, sugya_id, rank, reason, ranker_id)
SELECT question_id, tractate, folio, sugya_id, rank, reason, 1
FROM source_runs
WHERE run_id = 1 and source_finder_id = 1
''')
conn.commit()
if __name__ == '__main__':
# Create the database
create_eval_database()
# load_baseline_sources()
|