Spaces:
Sleeping
Sleeping
import asyncio | |
from typing import Optional | |
import gradio as gr | |
import pandas as pd | |
from data_access import get_pool, get_async_connection, close_pool | |
# Initialize data at the module level | |
questions = [] | |
source_finders = [] | |
questions_dict = {} | |
source_finders_dict = {} | |
question_options = [] | |
finder_options = [] | |
finder_labels = {"All": "All Source Finders"} | |
# Get all questions | |
async def get_questions(): | |
async with get_async_connection() as conn: | |
questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id") | |
return [{"id": q["id"], "text": q["question_text"]} for q in questions] | |
# Get distinct source finders | |
async def get_source_finders(): | |
async with get_async_connection() as conn: | |
finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id") | |
return [{"id": f["id"], "name": f["name"]} for f in finders] | |
# Get distinct run IDs for a question | |
async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None): | |
async with get_async_connection() as conn: | |
query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1" | |
params = [question_id] | |
if source_finder_id != "All" and source_finder_id is not None: | |
query += " AND source_finder_id = $2" | |
params.append(source_finder_id) | |
query += " ORDER BY run_id" | |
run_ids = await conn.fetch(query, *params) | |
return [r["run_id"] for r in run_ids] | |
# Get source runs for a specific question with filters | |
async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None, | |
run_id: Optional[int] = None): | |
async with get_async_connection() as conn: | |
# Build query with filters | |
query = """ | |
SELECT sr.*, sf.source_finder_type as finder_name | |
FROM source_runs sr | |
JOIN source_finders sf ON sr.source_finder_id = sf.id | |
WHERE sr.question_id = $1 and sr.run_id = 1 | |
""" | |
params = [question_id] | |
param_counter = 2 | |
if source_finder_id: | |
query += f" AND sr.source_finder_id = ${param_counter}" | |
params.append(source_finder_id) | |
param_counter += 1 | |
# if run_id: | |
# query += f" AND sr.run_id = ${param_counter}" | |
# params.append(run_id) | |
# param_counter += 1 | |
query += " ORDER BY sr.run_id, sr.rank DESC" | |
sources = await conn.fetch(query, *params) | |
return [dict(s) for s in sources] | |
# Initialize data in a single async function | |
async def initialize_data(): | |
global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels | |
questions = await get_questions() | |
source_finders = await get_source_finders() | |
# Convert to dictionaries for easier lookup | |
questions_dict = {q["id"]: q["text"] for q in questions} | |
source_finders_dict = {f["id"]: f["name"] for f in source_finders} | |
# Create formatted options for dropdowns | |
question_options = [f"{q['id']}: {q['text']}" for q in questions] | |
finder_options = [str(f["id"]) for f in source_finders] | |
finder_labels = {str(f["id"]): f["name"] for f in source_finders} | |
# Main function to handle UI interactions | |
def update_source_runs(question_option, source_finder_id): | |
if not question_option: | |
return None, [], "No question selected", None | |
# Extract question ID from selection | |
question_id = int(question_option.split(":")[0]) | |
# Get run_ids for filtering - use asyncio.run for each independent operation | |
# available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id)) | |
# run_id_options = [str(r_id) for r_id in available_run_ids] | |
# If the selected run_id is not in available options, reset it | |
# if run_id not in run_id_options: | |
# run_id = None | |
# | |
# # Convert run_id to int if not "All" | |
# run_id_int = None if len(run_id) == 0 else int(run_id) | |
finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id) | |
# Get source runs data | |
source_runs = asyncio.run(get_source_runs(question_id, finder_id_int)) | |
if not source_runs: | |
return None, None, "No results found for the selected filters", None | |
# Create DataFrame for display | |
df = pd.DataFrame(source_runs) | |
# Format table columns | |
columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason'] | |
df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df | |
# CSV for download | |
csv_data = df.to_csv(index=False) | |
result_message = f"Found {len(source_runs)} results" | |
return df_display, result_message, csv_data | |
# Function to update run_id dropdown when question or source_finder changes | |
def update_run_ids(question_option, source_finder_id): | |
if not question_option: | |
return [], None, "No question selected", None | |
# Extract question ID | |
question_id = int(question_option.split(":")[0]) | |
# Convert source_finder_id if not "All" | |
finder_id_int = None if source_finder_id == "All" else int(source_finder_id) | |
# Get available run IDs | |
available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int)) | |
run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids] | |
return run_id_options, None, "", None | |
# Create Gradio app | |
# Ensure we clean up when done | |
async def main(): | |
await get_pool() | |
await initialize_data() | |
with gr.Blocks(title="Source Runs Explorer") as app: | |
gr.Markdown("# Source Runs Explorer") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
# Main content area | |
question_dropdown = gr.Dropdown( | |
choices=question_options, | |
label="Select Question", | |
interactive=True | |
) | |
with gr.Row(): | |
source_finder_dropdown = gr.Dropdown( | |
choices=finder_options, | |
label="Source Finder", | |
interactive=True | |
) | |
# run_id_dropdown = gr.Dropdown( | |
# choices=[], | |
# value="", | |
# label="Run ID", | |
# interactive=True | |
# ) | |
result_text = gr.Markdown("Select a question to view source runs") | |
results_table = gr.DataFrame( | |
headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'], | |
interactive=False | |
) | |
download_button = gr.DownloadButton( | |
label="Download Results as CSV", | |
interactive=True, | |
visible=True | |
) | |
with gr.Column(scale=1): | |
# Sidebar area | |
gr.Markdown("### About") | |
gr.Markdown("This tool allows you to explore source runs for Talmudic questions.") | |
gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.") | |
gr.Markdown("### Statistics") | |
gr.Markdown(f"Total Questions: {len(questions)}") | |
gr.Markdown(f"Source Finders: {len(source_finders)}") | |
gr.Markdown("### Source Finders") | |
for f in source_finders: | |
gr.Markdown(f"**{f['id']}**: {f['name']}") | |
# Set up event handlers | |
question_dropdown.change( | |
update_source_runs, | |
inputs=[question_dropdown, source_finder_dropdown], | |
# outputs=[run_id_dropdown, results_table, result_text, download_button] | |
outputs=[results_table, result_text, download_button] | |
) | |
source_finder_dropdown.change( | |
update_source_runs, | |
inputs=[question_dropdown, source_finder_dropdown], | |
# outputs=[run_id_dropdown, results_table, result_text, download_button] | |
outputs=[results_table, result_text, download_button] | |
) | |
# run_id_dropdown.change( | |
# update_source_runs, | |
# inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown], | |
# outputs=[results_table, run_id_dropdown, result_text, download_button] | |
# ) | |
# Initial load of data when question is selected | |
question_dropdown.change( | |
update_source_runs, | |
inputs=[question_dropdown, source_finder_dropdown], | |
outputs=[results_table, result_text, download_button] | |
) | |
app.queue() | |
app.launch() | |
if __name__ == "__main__": | |
asyncio.run(main()) |