File size: 8,795 Bytes
a23bdc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import asyncio
from typing import Optional
import gradio as gr
import pandas as pd
from data_access import get_pool, get_async_connection, close_pool

# Initialize data at the module level
questions = []
source_finders = []
questions_dict = {}
source_finders_dict = {}
question_options = []
finder_options = []
finder_labels = {"All": "All Source Finders"}


# Get all questions
async def get_questions():
    async with get_async_connection() as conn:
        questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
        return [{"id": q["id"], "text": q["question_text"]} for q in questions]


# Get distinct source finders
async def get_source_finders():
    async with get_async_connection() as conn:
        finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
        return [{"id": f["id"], "name": f["name"]} for f in finders]


# Get distinct run IDs for a question
async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
    async with get_async_connection() as conn:
        query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
        params = [question_id]

        if source_finder_id != "All" and source_finder_id is not None:
            query += " AND source_finder_id = $2"
            params.append(source_finder_id)

        query += " ORDER BY run_id"

        run_ids = await conn.fetch(query, *params)
        return [r["run_id"] for r in run_ids]


# Get source runs for a specific question with filters
async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
                          run_id: Optional[int] = None):
    async with get_async_connection() as conn:
        # Build query with filters
        query = """
            SELECT sr.*, sf.source_finder_type as finder_name
            FROM source_runs sr
            JOIN source_finders sf ON sr.source_finder_id = sf.id
            WHERE sr.question_id = $1 and sr.run_id = 1
        """
        params = [question_id]
        param_counter = 2

        if source_finder_id:
            query += f" AND sr.source_finder_id = ${param_counter}"
            params.append(source_finder_id)
            param_counter += 1

        # if run_id:
        #     query += f" AND sr.run_id = ${param_counter}"
        #     params.append(run_id)
        #     param_counter += 1

        query += " ORDER BY sr.run_id, sr.rank DESC"

        sources = await conn.fetch(query, *params)
        return [dict(s) for s in sources]


# Initialize data in a single async function
async def initialize_data():
    global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels

    questions = await get_questions()
    source_finders = await get_source_finders()

    # Convert to dictionaries for easier lookup
    questions_dict = {q["id"]: q["text"] for q in questions}
    source_finders_dict = {f["id"]: f["name"] for f in source_finders}

    # Create formatted options for dropdowns
    question_options = [f"{q['id']}: {q['text']}" for q in questions]
    finder_options = [str(f["id"]) for f in source_finders]
    finder_labels = {str(f["id"]): f["name"] for f in source_finders}


# Main function to handle UI interactions
def update_source_runs(question_option, source_finder_id):
    if not question_option:
        return None, [], "No question selected", None

    # Extract question ID from selection
    question_id = int(question_option.split(":")[0])

    # Get run_ids for filtering - use asyncio.run for each independent operation
    # available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
    # run_id_options = [str(r_id) for r_id in available_run_ids]

    # If the selected run_id is not in available options, reset it
    # if run_id not in run_id_options:
    #     run_id = None
    #
    # # Convert run_id to int if not "All"
    # run_id_int = None if len(run_id) == 0 else int(run_id)
    finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)

    # Get source runs data
    source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))

    if not source_runs:
        return None, None, "No results found for the selected filters", None

    # Create DataFrame for display
    df = pd.DataFrame(source_runs)

    # Format table columns
    columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
    df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df

    # CSV for download
    csv_data = df.to_csv(index=False)

    result_message = f"Found {len(source_runs)} results"

    return df_display, result_message, csv_data


# Function to update run_id dropdown when question or source_finder changes
def update_run_ids(question_option, source_finder_id):
    if not question_option:
        return [], None, "No question selected", None

    # Extract question ID
    question_id = int(question_option.split(":")[0])

    # Convert source_finder_id if not "All"
    finder_id_int = None if source_finder_id == "All" else int(source_finder_id)

    # Get available run IDs
    available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
    run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]

    return run_id_options, None, "", None



# Create Gradio app

# Ensure we clean up when done
async def main():
    await get_pool()
    await initialize_data()
    with gr.Blocks(title="Source Runs Explorer") as app:
        gr.Markdown("# Source Runs Explorer")

        with gr.Row():
            with gr.Column(scale=3):
                # Main content area
                question_dropdown = gr.Dropdown(
                    choices=question_options,
                    label="Select Question",
                    interactive=True
                )

                with gr.Row():
                    source_finder_dropdown = gr.Dropdown(
                        choices=finder_options,
                        label="Source Finder",
                        interactive=True
                    )

                    # run_id_dropdown = gr.Dropdown(
                    #     choices=[],
                    #     value="",
                    #     label="Run ID",
                    #     interactive=True
                    # )

                result_text = gr.Markdown("Select a question to view source runs")

                results_table = gr.DataFrame(
                    headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
                    interactive=False
                )

                download_button = gr.DownloadButton(
                    label="Download Results as CSV",
                    interactive=True,
                    visible=True
                )

            with gr.Column(scale=1):
                # Sidebar area
                gr.Markdown("### About")
                gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
                gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")

                gr.Markdown("### Statistics")
                gr.Markdown(f"Total Questions: {len(questions)}")
                gr.Markdown(f"Source Finders: {len(source_finders)}")

                gr.Markdown("### Source Finders")
                for f in source_finders:
                    gr.Markdown(f"**{f['id']}**: {f['name']}")

        # Set up event handlers
        question_dropdown.change(
            update_source_runs,
            inputs=[question_dropdown, source_finder_dropdown],
            # outputs=[run_id_dropdown, results_table, result_text, download_button]
            outputs=[results_table, result_text, download_button]
        )

        source_finder_dropdown.change(
            update_source_runs,
            inputs=[question_dropdown, source_finder_dropdown],
            # outputs=[run_id_dropdown, results_table, result_text, download_button]
            outputs=[results_table, result_text, download_button]
        )

        # run_id_dropdown.change(
        #     update_source_runs,
        #     inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
        #     outputs=[results_table, run_id_dropdown, result_text, download_button]
        # )

        # Initial load of data when question is selected
        question_dropdown.change(
            update_source_runs,
            inputs=[question_dropdown, source_finder_dropdown],
            outputs=[results_table, result_text, download_button]
        )

    app.queue()
    app.launch()

if __name__ == "__main__":
    asyncio.run(main())