File size: 4,088 Bytes
6e35819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312213e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e35819
 
312213e
6e35819
312213e
6e35819
 
 
 
 
 
 
312213e
6e35819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312213e
 
6e35819
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from data_access import get_pg_sync_connection

conn = get_pg_sync_connection()



def create_eval_database():
    """Create SQLite database with a proper relational structure."""
    # Connect to the database (creates it if it doesn't exist)
    cursor = conn.cursor()

    # Create questions table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS questions (
        id SERIAL PRIMARY KEY,
        question_text TEXT NOT NULL,
    CONSTRAINT unique_question_text UNIQUE (question_text)
);
    ''')

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS rankers (
            id SERIAL PRIMARY KEY,
            ranker TEXT NOT NULL        
    );
        ''')

    # Create table for unique sources
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS baseline_sources (
        id SERIAL PRIMARY KEY,
        question_id INTEGER NOT NULL,
        tractate TEXT NOT NULL,
        folio TEXT NOT NULL,
        sugya_id TEXT NOT NULL,
        rank INTEGER NOT NULL,
        reason TEXT,         
        ranker_id INTEGER NOT NULL,
        FOREIGN KEY (question_id) REFERENCES questions(id),
        FOREIGN KEY (ranker_id) REFERENCES rankers(id),
        CONSTRAINT unique_source_per_question_ranker UNIQUE(question_id, sugya_id, ranker_id)
    );
    ''')

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS source_finders (
            id SERIAL PRIMARY KEY,
            source_finder_type TEXT NOT NULL,
            description TEXT,
            source_finder_version TEXT NOT NULL
        );
        ''')

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS source_finder_runs (
            id SERIAL PRIMARY KEY,
            run_id INTEGER NOT NULL,
            source_finder_id INTEGER NOT NULL,
            description TEXT,
            FOREIGN KEY (source_finder_id) REFERENCES source_finders(id),
            CONSTRAINT unique_source_per_run_id UNIQUE(run_id, source_finder_id)
        );
        ''')

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS source_finder_run_question_metadata (
            id SERIAL PRIMARY KEY,
            question_id INTEGER NOT NULL,
            source_finder_run_id INTEGER NOT NULL,            
            metadata JSON,
            FOREIGN KEY (source_finder_run_id) REFERENCES source_finder_runs(id),
            FOREIGN KEY (question_id) REFERENCES questions(id),
            CONSTRAINT unique_question_per_run_id UNIQUE(question_id, source_finder_run_id)
        );
        ''')


    # Create table for logging all sources from each run
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS source_run_results (
        id SERIAL PRIMARY KEY,
        source_finder_run_id INTEGER NOT NULL,
        run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        question_id INTEGER NOT NULL,
        tractate TEXT NOT NULL,
        folio TEXT NOT NULL,
        sugya_id TEXT NOT NULL,
        rank INTEGER NOT NULL,
        reason TEXT,
        FOREIGN KEY (source_finder_run_id) REFERENCES source_finder_runs(id),
        FOREIGN KEY (question_id) REFERENCES questions(id)
    );
    ''')

    conn.commit()
    conn.close()

def load_source_finders():
    cursor = conn.cursor()
    for item in ["claude_sources", "keywords", "lenses"]:
        cursor.execute("INSERT INTO source_finders (source_finder_type, source_finder_version) VALUES (%s, 1)", (item,))
    conn.commit()

def load_rankers():
    cursor = conn.cursor()
    for item in ["claude_sources"]:
        cursor.execute("INSERT INTO rankers (ranker) VALUES (%s)", (item,))
    conn.commit()

def load_baseline_sources():
    # copy all claude values where run_id = 1 from source_runs to baseline_sources
    cursor = conn.cursor()
    cursor.execute('''
        INSERT INTO baseline_sources (question_id, tractate, folio, sugya_id, rank, reason, ranker_id)
        SELECT question_id, tractate, folio, sugya_id, rank, reason, 1
        FROM source_runs
        WHERE run_id = 1 and source_finder_id = 1
    ''')
    conn.commit()



if __name__ == '__main__':
    # Create the database
    create_eval_database()
    # load_baseline_sources()