Spaces:
Running
Running
intial commit
Browse files- .gitignore +3 -0
- app.py +245 -0
- data_access.py +55 -0
- requirements.txt +3 -0
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.env
|
2 |
+
.idea/*
|
3 |
+
*.iml
|
app.py
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
from typing import Optional
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
+
from data_access import get_pool, get_async_connection, close_pool
|
6 |
+
|
7 |
+
# Initialize data at the module level
|
8 |
+
questions = []
|
9 |
+
source_finders = []
|
10 |
+
questions_dict = {}
|
11 |
+
source_finders_dict = {}
|
12 |
+
question_options = []
|
13 |
+
finder_options = []
|
14 |
+
finder_labels = {"All": "All Source Finders"}
|
15 |
+
|
16 |
+
|
17 |
+
# Get all questions
|
18 |
+
async def get_questions():
|
19 |
+
async with get_async_connection() as conn:
|
20 |
+
questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
|
21 |
+
return [{"id": q["id"], "text": q["question_text"]} for q in questions]
|
22 |
+
|
23 |
+
|
24 |
+
# Get distinct source finders
|
25 |
+
async def get_source_finders():
|
26 |
+
async with get_async_connection() as conn:
|
27 |
+
finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
|
28 |
+
return [{"id": f["id"], "name": f["name"]} for f in finders]
|
29 |
+
|
30 |
+
|
31 |
+
# Get distinct run IDs for a question
|
32 |
+
async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
|
33 |
+
async with get_async_connection() as conn:
|
34 |
+
query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
|
35 |
+
params = [question_id]
|
36 |
+
|
37 |
+
if source_finder_id != "All" and source_finder_id is not None:
|
38 |
+
query += " AND source_finder_id = $2"
|
39 |
+
params.append(source_finder_id)
|
40 |
+
|
41 |
+
query += " ORDER BY run_id"
|
42 |
+
|
43 |
+
run_ids = await conn.fetch(query, *params)
|
44 |
+
return [r["run_id"] for r in run_ids]
|
45 |
+
|
46 |
+
|
47 |
+
# Get source runs for a specific question with filters
|
48 |
+
async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
|
49 |
+
run_id: Optional[int] = None):
|
50 |
+
async with get_async_connection() as conn:
|
51 |
+
# Build query with filters
|
52 |
+
query = """
|
53 |
+
SELECT sr.*, sf.source_finder_type as finder_name
|
54 |
+
FROM source_runs sr
|
55 |
+
JOIN source_finders sf ON sr.source_finder_id = sf.id
|
56 |
+
WHERE sr.question_id = $1 and sr.run_id = 1
|
57 |
+
"""
|
58 |
+
params = [question_id]
|
59 |
+
param_counter = 2
|
60 |
+
|
61 |
+
if source_finder_id:
|
62 |
+
query += f" AND sr.source_finder_id = ${param_counter}"
|
63 |
+
params.append(source_finder_id)
|
64 |
+
param_counter += 1
|
65 |
+
|
66 |
+
# if run_id:
|
67 |
+
# query += f" AND sr.run_id = ${param_counter}"
|
68 |
+
# params.append(run_id)
|
69 |
+
# param_counter += 1
|
70 |
+
|
71 |
+
query += " ORDER BY sr.run_id, sr.rank DESC"
|
72 |
+
|
73 |
+
sources = await conn.fetch(query, *params)
|
74 |
+
return [dict(s) for s in sources]
|
75 |
+
|
76 |
+
|
77 |
+
# Initialize data in a single async function
|
78 |
+
async def initialize_data():
|
79 |
+
global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels
|
80 |
+
|
81 |
+
questions = await get_questions()
|
82 |
+
source_finders = await get_source_finders()
|
83 |
+
|
84 |
+
# Convert to dictionaries for easier lookup
|
85 |
+
questions_dict = {q["id"]: q["text"] for q in questions}
|
86 |
+
source_finders_dict = {f["id"]: f["name"] for f in source_finders}
|
87 |
+
|
88 |
+
# Create formatted options for dropdowns
|
89 |
+
question_options = [f"{q['id']}: {q['text']}" for q in questions]
|
90 |
+
finder_options = [str(f["id"]) for f in source_finders]
|
91 |
+
finder_labels = {str(f["id"]): f["name"] for f in source_finders}
|
92 |
+
|
93 |
+
|
94 |
+
# Main function to handle UI interactions
|
95 |
+
def update_source_runs(question_option, source_finder_id):
|
96 |
+
if not question_option:
|
97 |
+
return None, [], "No question selected", None
|
98 |
+
|
99 |
+
# Extract question ID from selection
|
100 |
+
question_id = int(question_option.split(":")[0])
|
101 |
+
|
102 |
+
# Get run_ids for filtering - use asyncio.run for each independent operation
|
103 |
+
# available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
|
104 |
+
# run_id_options = [str(r_id) for r_id in available_run_ids]
|
105 |
+
|
106 |
+
# If the selected run_id is not in available options, reset it
|
107 |
+
# if run_id not in run_id_options:
|
108 |
+
# run_id = None
|
109 |
+
#
|
110 |
+
# # Convert run_id to int if not "All"
|
111 |
+
# run_id_int = None if len(run_id) == 0 else int(run_id)
|
112 |
+
finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)
|
113 |
+
|
114 |
+
# Get source runs data
|
115 |
+
source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))
|
116 |
+
|
117 |
+
if not source_runs:
|
118 |
+
return None, None, "No results found for the selected filters", None
|
119 |
+
|
120 |
+
# Create DataFrame for display
|
121 |
+
df = pd.DataFrame(source_runs)
|
122 |
+
|
123 |
+
# Format table columns
|
124 |
+
columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
|
125 |
+
df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
|
126 |
+
|
127 |
+
# CSV for download
|
128 |
+
csv_data = df.to_csv(index=False)
|
129 |
+
|
130 |
+
result_message = f"Found {len(source_runs)} results"
|
131 |
+
|
132 |
+
return df_display, result_message, csv_data
|
133 |
+
|
134 |
+
|
135 |
+
# Function to update run_id dropdown when question or source_finder changes
|
136 |
+
def update_run_ids(question_option, source_finder_id):
|
137 |
+
if not question_option:
|
138 |
+
return [], None, "No question selected", None
|
139 |
+
|
140 |
+
# Extract question ID
|
141 |
+
question_id = int(question_option.split(":")[0])
|
142 |
+
|
143 |
+
# Convert source_finder_id if not "All"
|
144 |
+
finder_id_int = None if source_finder_id == "All" else int(source_finder_id)
|
145 |
+
|
146 |
+
# Get available run IDs
|
147 |
+
available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
|
148 |
+
run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]
|
149 |
+
|
150 |
+
return run_id_options, None, "", None
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
+
# Create Gradio app
|
155 |
+
|
156 |
+
# Ensure we clean up when done
|
157 |
+
async def main():
|
158 |
+
await get_pool()
|
159 |
+
await initialize_data()
|
160 |
+
with gr.Blocks(title="Source Runs Explorer") as app:
|
161 |
+
gr.Markdown("# Source Runs Explorer")
|
162 |
+
|
163 |
+
with gr.Row():
|
164 |
+
with gr.Column(scale=3):
|
165 |
+
# Main content area
|
166 |
+
question_dropdown = gr.Dropdown(
|
167 |
+
choices=question_options,
|
168 |
+
label="Select Question",
|
169 |
+
interactive=True
|
170 |
+
)
|
171 |
+
|
172 |
+
with gr.Row():
|
173 |
+
source_finder_dropdown = gr.Dropdown(
|
174 |
+
choices=finder_options,
|
175 |
+
label="Source Finder",
|
176 |
+
interactive=True
|
177 |
+
)
|
178 |
+
|
179 |
+
# run_id_dropdown = gr.Dropdown(
|
180 |
+
# choices=[],
|
181 |
+
# value="",
|
182 |
+
# label="Run ID",
|
183 |
+
# interactive=True
|
184 |
+
# )
|
185 |
+
|
186 |
+
result_text = gr.Markdown("Select a question to view source runs")
|
187 |
+
|
188 |
+
results_table = gr.DataFrame(
|
189 |
+
headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
|
190 |
+
interactive=False
|
191 |
+
)
|
192 |
+
|
193 |
+
download_button = gr.DownloadButton(
|
194 |
+
label="Download Results as CSV",
|
195 |
+
interactive=True,
|
196 |
+
visible=True
|
197 |
+
)
|
198 |
+
|
199 |
+
with gr.Column(scale=1):
|
200 |
+
# Sidebar area
|
201 |
+
gr.Markdown("### About")
|
202 |
+
gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
|
203 |
+
gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")
|
204 |
+
|
205 |
+
gr.Markdown("### Statistics")
|
206 |
+
gr.Markdown(f"Total Questions: {len(questions)}")
|
207 |
+
gr.Markdown(f"Source Finders: {len(source_finders)}")
|
208 |
+
|
209 |
+
gr.Markdown("### Source Finders")
|
210 |
+
for f in source_finders:
|
211 |
+
gr.Markdown(f"**{f['id']}**: {f['name']}")
|
212 |
+
|
213 |
+
# Set up event handlers
|
214 |
+
question_dropdown.change(
|
215 |
+
update_source_runs,
|
216 |
+
inputs=[question_dropdown, source_finder_dropdown],
|
217 |
+
# outputs=[run_id_dropdown, results_table, result_text, download_button]
|
218 |
+
outputs=[results_table, result_text, download_button]
|
219 |
+
)
|
220 |
+
|
221 |
+
source_finder_dropdown.change(
|
222 |
+
update_source_runs,
|
223 |
+
inputs=[question_dropdown, source_finder_dropdown],
|
224 |
+
# outputs=[run_id_dropdown, results_table, result_text, download_button]
|
225 |
+
outputs=[results_table, result_text, download_button]
|
226 |
+
)
|
227 |
+
|
228 |
+
# run_id_dropdown.change(
|
229 |
+
# update_source_runs,
|
230 |
+
# inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
|
231 |
+
# outputs=[results_table, run_id_dropdown, result_text, download_button]
|
232 |
+
# )
|
233 |
+
|
234 |
+
# Initial load of data when question is selected
|
235 |
+
question_dropdown.change(
|
236 |
+
update_source_runs,
|
237 |
+
inputs=[question_dropdown, source_finder_dropdown],
|
238 |
+
outputs=[results_table, result_text, download_button]
|
239 |
+
)
|
240 |
+
|
241 |
+
app.queue()
|
242 |
+
app.launch()
|
243 |
+
|
244 |
+
if __name__ == "__main__":
|
245 |
+
asyncio.run(main())
|
data_access.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
from contextlib import asynccontextmanager
|
4 |
+
|
5 |
+
import asyncpg
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
|
8 |
+
# Global connection pool
|
9 |
+
_pool = None
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
|
13 |
+
async def get_pool(schema="talmudexplore", min_size=2, max_size=5):
|
14 |
+
"""Initialize and return the connection pool with the specified schema."""
|
15 |
+
global _pool
|
16 |
+
if _pool is not None:
|
17 |
+
current_loop = asyncio.get_running_loop()
|
18 |
+
if getattr(_pool, '_loop', None) != current_loop:
|
19 |
+
try:
|
20 |
+
await _pool.close()
|
21 |
+
except:
|
22 |
+
pass
|
23 |
+
_pool = None
|
24 |
+
|
25 |
+
if _pool is None:
|
26 |
+
_pool = await asyncpg.create_pool(
|
27 |
+
database=os.getenv("pg_dbname"),
|
28 |
+
user=os.getenv("pg_user"),
|
29 |
+
password=os.getenv("pg_password"),
|
30 |
+
host=os.getenv("pg_host"),
|
31 |
+
port=os.getenv("pg_port"),
|
32 |
+
min_size=min_size,
|
33 |
+
max_size=max_size,
|
34 |
+
setup=lambda conn: conn.execute(f'SET search_path TO {schema}')
|
35 |
+
|
36 |
+
)
|
37 |
+
return _pool
|
38 |
+
|
39 |
+
@asynccontextmanager
|
40 |
+
async def get_async_connection():
|
41 |
+
"""Get a connection from the pool as an async context manager."""
|
42 |
+
pool = await get_pool()
|
43 |
+
conn = await pool.acquire()
|
44 |
+
try:
|
45 |
+
yield conn
|
46 |
+
finally:
|
47 |
+
await pool.release(conn)
|
48 |
+
|
49 |
+
async def close_pool():
|
50 |
+
"""Close the connection pool."""
|
51 |
+
global _pool
|
52 |
+
if _pool:
|
53 |
+
await _pool.close()
|
54 |
+
_pool = None
|
55 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
asyncpg
|
2 |
+
gradio
|
3 |
+
dotenv
|