import asyncio from typing import Optional import gradio as gr import pandas as pd from data_access import get_pool, get_async_connection, close_pool # Initialize data at the module level questions = [] source_finders = [] questions_dict = {} source_finders_dict = {} question_options = [] finder_options = [] finder_labels = {"All": "All Source Finders"} # Get all questions async def get_questions(): async with get_async_connection() as conn: questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id") return [{"id": q["id"], "text": q["question_text"]} for q in questions] # Get distinct source finders async def get_source_finders(): async with get_async_connection() as conn: finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id") return [{"id": f["id"], "name": f["name"]} for f in finders] # Get distinct run IDs for a question async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None): async with get_async_connection() as conn: query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1" params = [question_id] if source_finder_id != "All" and source_finder_id is not None: query += " AND source_finder_id = $2" params.append(source_finder_id) query += " ORDER BY run_id" run_ids = await conn.fetch(query, *params) return [r["run_id"] for r in run_ids] # Get source runs for a specific question with filters async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None, run_id: Optional[int] = None): async with get_async_connection() as conn: # Build query with filters query = """ SELECT sr.*, sf.source_finder_type as finder_name FROM source_runs sr JOIN source_finders sf ON sr.source_finder_id = sf.id WHERE sr.question_id = $1 and sr.run_id = 1 """ params = [question_id] param_counter = 2 if source_finder_id: query += f" AND sr.source_finder_id = ${param_counter}" params.append(source_finder_id) param_counter += 1 # if run_id: # query += f" AND sr.run_id = ${param_counter}" # params.append(run_id) # param_counter += 1 query += " ORDER BY sr.run_id, sr.rank DESC" sources = await conn.fetch(query, *params) return [dict(s) for s in sources] # Initialize data in a single async function async def initialize_data(): global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels questions = await get_questions() source_finders = await get_source_finders() # Convert to dictionaries for easier lookup questions_dict = {q["id"]: q["text"] for q in questions} source_finders_dict = {f["id"]: f["name"] for f in source_finders} # Create formatted options for dropdowns question_options = [f"{q['id']}: {q['text']}" for q in questions] finder_options = [str(f["id"]) for f in source_finders] finder_labels = {str(f["id"]): f["name"] for f in source_finders} # Main function to handle UI interactions def update_source_runs(question_option, source_finder_id): if not question_option: return None, [], "No question selected", None # Extract question ID from selection question_id = int(question_option.split(":")[0]) # Get run_ids for filtering - use asyncio.run for each independent operation # available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id)) # run_id_options = [str(r_id) for r_id in available_run_ids] # If the selected run_id is not in available options, reset it # if run_id not in run_id_options: # run_id = None # # # Convert run_id to int if not "All" # run_id_int = None if len(run_id) == 0 else int(run_id) finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id) # Get source runs data source_runs = asyncio.run(get_source_runs(question_id, finder_id_int)) if not source_runs: return None, None, "No results found for the selected filters" # Create DataFrame for display df = pd.DataFrame(source_runs) # Format table columns columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason'] df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df # CSV for download csv_data = df.to_csv(index=False) result_message = f"Found {len(source_runs)} results" return df_display, result_message, csv_data # Function to update run_id dropdown when question or source_finder changes def update_run_ids(question_option, source_finder_id): if not question_option: return [], None, "No question selected", None # Extract question ID question_id = int(question_option.split(":")[0]) # Convert source_finder_id if not "All" finder_id_int = None if source_finder_id == "All" else int(source_finder_id) # Get available run IDs available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int)) run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids] return run_id_options, None, "", None # Create Gradio app # Ensure we clean up when done async def main(): await get_pool() await initialize_data() with gr.Blocks(title="Source Runs Explorer") as app: gr.Markdown("# Source Runs Explorer") with gr.Row(): with gr.Column(scale=3): # Main content area question_dropdown = gr.Dropdown( choices=question_options, label="Select Question", interactive=True ) with gr.Row(): source_finder_dropdown = gr.Dropdown( choices=finder_options, label="Source Finder", interactive=True ) # run_id_dropdown = gr.Dropdown( # choices=[], # value="", # label="Run ID", # interactive=True # ) result_text = gr.Markdown("Select a question to view source runs") results_table = gr.DataFrame( headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'], interactive=False ) download_button = gr.DownloadButton( label="Download Results as CSV", interactive=True, visible=True ) with gr.Column(scale=1): # Sidebar area gr.Markdown("### About") gr.Markdown("This tool allows you to explore source runs for Talmudic questions.") gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.") gr.Markdown("### Statistics") gr.Markdown(f"Total Questions: {len(questions)}") gr.Markdown(f"Source Finders: {len(source_finders)}") gr.Markdown("### Source Finders") for f in source_finders: gr.Markdown(f"**{f['id']}**: {f['name']}") # Set up event handlers question_dropdown.change( update_source_runs, inputs=[question_dropdown, source_finder_dropdown], # outputs=[run_id_dropdown, results_table, result_text, download_button] outputs=[results_table, result_text, download_button] ) source_finder_dropdown.change( update_source_runs, inputs=[question_dropdown, source_finder_dropdown], # outputs=[run_id_dropdown, results_table, result_text, download_button] outputs=[results_table, result_text, download_button] ) # run_id_dropdown.change( # update_source_runs, # inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown], # outputs=[results_table, run_id_dropdown, result_text, download_button] # ) # Initial load of data when question is selected question_dropdown.change( update_source_runs, inputs=[question_dropdown, source_finder_dropdown], outputs=[results_table, result_text, download_button] ) app.queue() app.launch() if __name__ == "__main__": asyncio.run(main())