Spaces:
Sleeping
Sleeping
File size: 8,795 Bytes
a23bdc6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 |
import asyncio
from typing import Optional
import gradio as gr
import pandas as pd
from data_access import get_pool, get_async_connection, close_pool
# Initialize data at the module level
questions = []
source_finders = []
questions_dict = {}
source_finders_dict = {}
question_options = []
finder_options = []
finder_labels = {"All": "All Source Finders"}
# Get all questions
async def get_questions():
async with get_async_connection() as conn:
questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
return [{"id": q["id"], "text": q["question_text"]} for q in questions]
# Get distinct source finders
async def get_source_finders():
async with get_async_connection() as conn:
finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
return [{"id": f["id"], "name": f["name"]} for f in finders]
# Get distinct run IDs for a question
async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
async with get_async_connection() as conn:
query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
params = [question_id]
if source_finder_id != "All" and source_finder_id is not None:
query += " AND source_finder_id = $2"
params.append(source_finder_id)
query += " ORDER BY run_id"
run_ids = await conn.fetch(query, *params)
return [r["run_id"] for r in run_ids]
# Get source runs for a specific question with filters
async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
run_id: Optional[int] = None):
async with get_async_connection() as conn:
# Build query with filters
query = """
SELECT sr.*, sf.source_finder_type as finder_name
FROM source_runs sr
JOIN source_finders sf ON sr.source_finder_id = sf.id
WHERE sr.question_id = $1 and sr.run_id = 1
"""
params = [question_id]
param_counter = 2
if source_finder_id:
query += f" AND sr.source_finder_id = ${param_counter}"
params.append(source_finder_id)
param_counter += 1
# if run_id:
# query += f" AND sr.run_id = ${param_counter}"
# params.append(run_id)
# param_counter += 1
query += " ORDER BY sr.run_id, sr.rank DESC"
sources = await conn.fetch(query, *params)
return [dict(s) for s in sources]
# Initialize data in a single async function
async def initialize_data():
global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels
questions = await get_questions()
source_finders = await get_source_finders()
# Convert to dictionaries for easier lookup
questions_dict = {q["id"]: q["text"] for q in questions}
source_finders_dict = {f["id"]: f["name"] for f in source_finders}
# Create formatted options for dropdowns
question_options = [f"{q['id']}: {q['text']}" for q in questions]
finder_options = [str(f["id"]) for f in source_finders]
finder_labels = {str(f["id"]): f["name"] for f in source_finders}
# Main function to handle UI interactions
def update_source_runs(question_option, source_finder_id):
if not question_option:
return None, [], "No question selected", None
# Extract question ID from selection
question_id = int(question_option.split(":")[0])
# Get run_ids for filtering - use asyncio.run for each independent operation
# available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
# run_id_options = [str(r_id) for r_id in available_run_ids]
# If the selected run_id is not in available options, reset it
# if run_id not in run_id_options:
# run_id = None
#
# # Convert run_id to int if not "All"
# run_id_int = None if len(run_id) == 0 else int(run_id)
finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)
# Get source runs data
source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))
if not source_runs:
return None, None, "No results found for the selected filters", None
# Create DataFrame for display
df = pd.DataFrame(source_runs)
# Format table columns
columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
# CSV for download
csv_data = df.to_csv(index=False)
result_message = f"Found {len(source_runs)} results"
return df_display, result_message, csv_data
# Function to update run_id dropdown when question or source_finder changes
def update_run_ids(question_option, source_finder_id):
if not question_option:
return [], None, "No question selected", None
# Extract question ID
question_id = int(question_option.split(":")[0])
# Convert source_finder_id if not "All"
finder_id_int = None if source_finder_id == "All" else int(source_finder_id)
# Get available run IDs
available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]
return run_id_options, None, "", None
# Create Gradio app
# Ensure we clean up when done
async def main():
await get_pool()
await initialize_data()
with gr.Blocks(title="Source Runs Explorer") as app:
gr.Markdown("# Source Runs Explorer")
with gr.Row():
with gr.Column(scale=3):
# Main content area
question_dropdown = gr.Dropdown(
choices=question_options,
label="Select Question",
interactive=True
)
with gr.Row():
source_finder_dropdown = gr.Dropdown(
choices=finder_options,
label="Source Finder",
interactive=True
)
# run_id_dropdown = gr.Dropdown(
# choices=[],
# value="",
# label="Run ID",
# interactive=True
# )
result_text = gr.Markdown("Select a question to view source runs")
results_table = gr.DataFrame(
headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
interactive=False
)
download_button = gr.DownloadButton(
label="Download Results as CSV",
interactive=True,
visible=True
)
with gr.Column(scale=1):
# Sidebar area
gr.Markdown("### About")
gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")
gr.Markdown("### Statistics")
gr.Markdown(f"Total Questions: {len(questions)}")
gr.Markdown(f"Source Finders: {len(source_finders)}")
gr.Markdown("### Source Finders")
for f in source_finders:
gr.Markdown(f"**{f['id']}**: {f['name']}")
# Set up event handlers
question_dropdown.change(
update_source_runs,
inputs=[question_dropdown, source_finder_dropdown],
# outputs=[run_id_dropdown, results_table, result_text, download_button]
outputs=[results_table, result_text, download_button]
)
source_finder_dropdown.change(
update_source_runs,
inputs=[question_dropdown, source_finder_dropdown],
# outputs=[run_id_dropdown, results_table, result_text, download_button]
outputs=[results_table, result_text, download_button]
)
# run_id_dropdown.change(
# update_source_runs,
# inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
# outputs=[results_table, run_id_dropdown, result_text, download_button]
# )
# Initial load of data when question is selected
question_dropdown.change(
update_source_runs,
inputs=[question_dropdown, source_finder_dropdown],
outputs=[results_table, result_text, download_button]
)
app.queue()
app.launch()
if __name__ == "__main__":
asyncio.run(main()) |