davidr70 commited on
Commit
a23bdc6
·
1 Parent(s): 3ed14b6

intial commit

Browse files
Files changed (4) hide show
  1. .gitignore +3 -0
  2. app.py +245 -0
  3. data_access.py +55 -0
  4. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ .idea/*
3
+ *.iml
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from typing import Optional
3
+ import gradio as gr
4
+ import pandas as pd
5
+ from data_access import get_pool, get_async_connection, close_pool
6
+
7
+ # Initialize data at the module level
8
+ questions = []
9
+ source_finders = []
10
+ questions_dict = {}
11
+ source_finders_dict = {}
12
+ question_options = []
13
+ finder_options = []
14
+ finder_labels = {"All": "All Source Finders"}
15
+
16
+
17
+ # Get all questions
18
+ async def get_questions():
19
+ async with get_async_connection() as conn:
20
+ questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
21
+ return [{"id": q["id"], "text": q["question_text"]} for q in questions]
22
+
23
+
24
+ # Get distinct source finders
25
+ async def get_source_finders():
26
+ async with get_async_connection() as conn:
27
+ finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
28
+ return [{"id": f["id"], "name": f["name"]} for f in finders]
29
+
30
+
31
+ # Get distinct run IDs for a question
32
+ async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
33
+ async with get_async_connection() as conn:
34
+ query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
35
+ params = [question_id]
36
+
37
+ if source_finder_id != "All" and source_finder_id is not None:
38
+ query += " AND source_finder_id = $2"
39
+ params.append(source_finder_id)
40
+
41
+ query += " ORDER BY run_id"
42
+
43
+ run_ids = await conn.fetch(query, *params)
44
+ return [r["run_id"] for r in run_ids]
45
+
46
+
47
+ # Get source runs for a specific question with filters
48
+ async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
49
+ run_id: Optional[int] = None):
50
+ async with get_async_connection() as conn:
51
+ # Build query with filters
52
+ query = """
53
+ SELECT sr.*, sf.source_finder_type as finder_name
54
+ FROM source_runs sr
55
+ JOIN source_finders sf ON sr.source_finder_id = sf.id
56
+ WHERE sr.question_id = $1 and sr.run_id = 1
57
+ """
58
+ params = [question_id]
59
+ param_counter = 2
60
+
61
+ if source_finder_id:
62
+ query += f" AND sr.source_finder_id = ${param_counter}"
63
+ params.append(source_finder_id)
64
+ param_counter += 1
65
+
66
+ # if run_id:
67
+ # query += f" AND sr.run_id = ${param_counter}"
68
+ # params.append(run_id)
69
+ # param_counter += 1
70
+
71
+ query += " ORDER BY sr.run_id, sr.rank DESC"
72
+
73
+ sources = await conn.fetch(query, *params)
74
+ return [dict(s) for s in sources]
75
+
76
+
77
+ # Initialize data in a single async function
78
+ async def initialize_data():
79
+ global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels
80
+
81
+ questions = await get_questions()
82
+ source_finders = await get_source_finders()
83
+
84
+ # Convert to dictionaries for easier lookup
85
+ questions_dict = {q["id"]: q["text"] for q in questions}
86
+ source_finders_dict = {f["id"]: f["name"] for f in source_finders}
87
+
88
+ # Create formatted options for dropdowns
89
+ question_options = [f"{q['id']}: {q['text']}" for q in questions]
90
+ finder_options = [str(f["id"]) for f in source_finders]
91
+ finder_labels = {str(f["id"]): f["name"] for f in source_finders}
92
+
93
+
94
+ # Main function to handle UI interactions
95
+ def update_source_runs(question_option, source_finder_id):
96
+ if not question_option:
97
+ return None, [], "No question selected", None
98
+
99
+ # Extract question ID from selection
100
+ question_id = int(question_option.split(":")[0])
101
+
102
+ # Get run_ids for filtering - use asyncio.run for each independent operation
103
+ # available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
104
+ # run_id_options = [str(r_id) for r_id in available_run_ids]
105
+
106
+ # If the selected run_id is not in available options, reset it
107
+ # if run_id not in run_id_options:
108
+ # run_id = None
109
+ #
110
+ # # Convert run_id to int if not "All"
111
+ # run_id_int = None if len(run_id) == 0 else int(run_id)
112
+ finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)
113
+
114
+ # Get source runs data
115
+ source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))
116
+
117
+ if not source_runs:
118
+ return None, None, "No results found for the selected filters", None
119
+
120
+ # Create DataFrame for display
121
+ df = pd.DataFrame(source_runs)
122
+
123
+ # Format table columns
124
+ columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
125
+ df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
126
+
127
+ # CSV for download
128
+ csv_data = df.to_csv(index=False)
129
+
130
+ result_message = f"Found {len(source_runs)} results"
131
+
132
+ return df_display, result_message, csv_data
133
+
134
+
135
+ # Function to update run_id dropdown when question or source_finder changes
136
+ def update_run_ids(question_option, source_finder_id):
137
+ if not question_option:
138
+ return [], None, "No question selected", None
139
+
140
+ # Extract question ID
141
+ question_id = int(question_option.split(":")[0])
142
+
143
+ # Convert source_finder_id if not "All"
144
+ finder_id_int = None if source_finder_id == "All" else int(source_finder_id)
145
+
146
+ # Get available run IDs
147
+ available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
148
+ run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]
149
+
150
+ return run_id_options, None, "", None
151
+
152
+
153
+
154
+ # Create Gradio app
155
+
156
+ # Ensure we clean up when done
157
+ async def main():
158
+ await get_pool()
159
+ await initialize_data()
160
+ with gr.Blocks(title="Source Runs Explorer") as app:
161
+ gr.Markdown("# Source Runs Explorer")
162
+
163
+ with gr.Row():
164
+ with gr.Column(scale=3):
165
+ # Main content area
166
+ question_dropdown = gr.Dropdown(
167
+ choices=question_options,
168
+ label="Select Question",
169
+ interactive=True
170
+ )
171
+
172
+ with gr.Row():
173
+ source_finder_dropdown = gr.Dropdown(
174
+ choices=finder_options,
175
+ label="Source Finder",
176
+ interactive=True
177
+ )
178
+
179
+ # run_id_dropdown = gr.Dropdown(
180
+ # choices=[],
181
+ # value="",
182
+ # label="Run ID",
183
+ # interactive=True
184
+ # )
185
+
186
+ result_text = gr.Markdown("Select a question to view source runs")
187
+
188
+ results_table = gr.DataFrame(
189
+ headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
190
+ interactive=False
191
+ )
192
+
193
+ download_button = gr.DownloadButton(
194
+ label="Download Results as CSV",
195
+ interactive=True,
196
+ visible=True
197
+ )
198
+
199
+ with gr.Column(scale=1):
200
+ # Sidebar area
201
+ gr.Markdown("### About")
202
+ gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
203
+ gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")
204
+
205
+ gr.Markdown("### Statistics")
206
+ gr.Markdown(f"Total Questions: {len(questions)}")
207
+ gr.Markdown(f"Source Finders: {len(source_finders)}")
208
+
209
+ gr.Markdown("### Source Finders")
210
+ for f in source_finders:
211
+ gr.Markdown(f"**{f['id']}**: {f['name']}")
212
+
213
+ # Set up event handlers
214
+ question_dropdown.change(
215
+ update_source_runs,
216
+ inputs=[question_dropdown, source_finder_dropdown],
217
+ # outputs=[run_id_dropdown, results_table, result_text, download_button]
218
+ outputs=[results_table, result_text, download_button]
219
+ )
220
+
221
+ source_finder_dropdown.change(
222
+ update_source_runs,
223
+ inputs=[question_dropdown, source_finder_dropdown],
224
+ # outputs=[run_id_dropdown, results_table, result_text, download_button]
225
+ outputs=[results_table, result_text, download_button]
226
+ )
227
+
228
+ # run_id_dropdown.change(
229
+ # update_source_runs,
230
+ # inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
231
+ # outputs=[results_table, run_id_dropdown, result_text, download_button]
232
+ # )
233
+
234
+ # Initial load of data when question is selected
235
+ question_dropdown.change(
236
+ update_source_runs,
237
+ inputs=[question_dropdown, source_finder_dropdown],
238
+ outputs=[results_table, result_text, download_button]
239
+ )
240
+
241
+ app.queue()
242
+ app.launch()
243
+
244
+ if __name__ == "__main__":
245
+ asyncio.run(main())
data_access.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import os
3
+ from contextlib import asynccontextmanager
4
+
5
+ import asyncpg
6
+ from dotenv import load_dotenv
7
+
8
+ # Global connection pool
9
+ _pool = None
10
+ load_dotenv()
11
+
12
+
13
+ async def get_pool(schema="talmudexplore", min_size=2, max_size=5):
14
+ """Initialize and return the connection pool with the specified schema."""
15
+ global _pool
16
+ if _pool is not None:
17
+ current_loop = asyncio.get_running_loop()
18
+ if getattr(_pool, '_loop', None) != current_loop:
19
+ try:
20
+ await _pool.close()
21
+ except:
22
+ pass
23
+ _pool = None
24
+
25
+ if _pool is None:
26
+ _pool = await asyncpg.create_pool(
27
+ database=os.getenv("pg_dbname"),
28
+ user=os.getenv("pg_user"),
29
+ password=os.getenv("pg_password"),
30
+ host=os.getenv("pg_host"),
31
+ port=os.getenv("pg_port"),
32
+ min_size=min_size,
33
+ max_size=max_size,
34
+ setup=lambda conn: conn.execute(f'SET search_path TO {schema}')
35
+
36
+ )
37
+ return _pool
38
+
39
+ @asynccontextmanager
40
+ async def get_async_connection():
41
+ """Get a connection from the pool as an async context manager."""
42
+ pool = await get_pool()
43
+ conn = await pool.acquire()
44
+ try:
45
+ yield conn
46
+ finally:
47
+ await pool.release(conn)
48
+
49
+ async def close_pool():
50
+ """Close the connection pool."""
51
+ global _pool
52
+ if _pool:
53
+ await _pool.close()
54
+ _pool = None
55
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ asyncpg
2
+ gradio
3
+ dotenv