File size: 3,164 Bytes
6e35819
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
from data_access import get_pg_sync_connection

conn = get_pg_sync_connection()



def create_eval_database():
    """Create SQLite database with a proper relational structure."""
    # Connect to the database (creates it if it doesn't exist)
    cursor = conn.cursor()

    # Create questions table
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS questions (
        id SERIAL PRIMARY KEY,
        question_text TEXT NOT NULL,
    CONSTRAINT unique_question_text UNIQUE (question_text)
);
    ''')

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS rankers (
            id SERIAL PRIMARY KEY,
            ranker TEXT NOT NULL        
    );
        ''')

    # Create table for unique sources
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS baseline_sources (
        id SERIAL PRIMARY KEY,
        question_id INTEGER NOT NULL,
        tractate TEXT NOT NULL,
        folio TEXT NOT NULL,
        sugya_id TEXT NOT NULL,
        rank INTEGER NOT NULL,
        reason TEXT,         
        ranker_id INTEGER NOT NULL,
        FOREIGN KEY (question_id) REFERENCES questions(id),
        FOREIGN KEY (ranker_id) REFERENCES rankers(id),
        CONSTRAINT unique_source_per_question_ranker UNIQUE(question_id, sugya_id, ranker_id)
    );
    ''')

    cursor.execute('''
        CREATE TABLE IF NOT EXISTS source_finders (
            id SERIAL PRIMARY KEY,
            source_finder_type TEXT NOT NULL,
            description TEXT,
            source_finder_version TEXT NOT NULL
        );
        ''')

    # Create table for logging all sources from each run
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS source_runs (
        id SERIAL PRIMARY KEY,
        source_finder_id INTEGER NOT NULL,
        run_id TEXT NOT NULL,
        run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        question_id INTEGER NOT NULL,
        tractate TEXT NOT NULL,
        folio TEXT NOT NULL,
        sugya_id TEXT NOT NULL,
        rank INTEGER NOT NULL,
        reason TEXT,
        FOREIGN KEY (source_finder_id) REFERENCES source_finders(id),
        FOREIGN KEY (question_id) REFERENCES questions(id)
    );
    ''')

    conn.commit()
    conn.close()

def load_source_finders():
    cursor = conn.cursor()
    for item in ["claude_sources", "keywords", "lenses"]:
        cursor.execute("INSERT INTO source_finders (source_finder_type, source_finder_version) VALUES (%s, 1)", (item,))
    conn.commit()

def load_rankers():
    cursor = conn.cursor()
    for item in ["claude_sources"]:
        cursor.execute("INSERT INTO rankers (ranker) VALUES (%s)", (item,))
    conn.commit()

def load_baseline_sources():
    # copy all claude values where run_id = 1 from source_runs to baseline_sources
    cursor = conn.cursor()
    cursor.execute('''
        INSERT INTO baseline_sources (question_id, tractate, folio, sugya_id, rank, reason, ranker_id)
        SELECT question_id, tractate, folio, sugya_id, rank, reason, 1
        FROM source_runs
        WHERE run_id = 1 and source_finder_id = 1
    ''')
    conn.commit()



if __name__ == '__main__':
    # Create the database
    # create_eval_database()
    load_baseline_sources()