Spaces:

davidr70
/

eval_results

Sleeping

File size: 8,795 Bytes

a23bdc6

import asyncio
from typing import Optional
import gradio as gr
import pandas as pd
from data_access import get_pool, get_async_connection, close_pool

# Initialize data at the module level
questions = []
source_finders = []
questions_dict = {}
source_finders_dict = {}
question_options = []
finder_options = []
finder_labels = {"All": "All Source Finders"}


# Get all questions
async def get_questions():
    async with get_async_connection() as conn:
        questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
        return [{"id": q["id"], "text": q["question_text"]} for q in questions]


# Get distinct source finders
async def get_source_finders():
    async with get_async_connection() as conn:
        finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
        return [{"id": f["id"], "name": f["name"]} for f in finders]


# Get distinct run IDs for a question
async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
    async with get_async_connection() as conn:
        query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
        params = [question_id]

        if source_finder_id != "All" and source_finder_id is not None:
            query += " AND source_finder_id = $2"
            params.append(source_finder_id)

        query += " ORDER BY run_id"

        run_ids = await conn.fetch(query, *params)
        return [r["run_id"] for r in run_ids]


# Get source runs for a specific question with filters
async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
                          run_id: Optional[int] = None):
    async with get_async_connection() as conn:
        # Build query with filters
        query = """
            SELECT sr.*, sf.source_finder_type as finder_name
            FROM source_runs sr
            JOIN source_finders sf ON sr.source_finder_id = sf.id
            WHERE sr.question_id = $1 and sr.run_id = 1
        """
        params = [question_id]
        param_counter = 2

        if source_finder_id:
            query += f" AND sr.source_finder_id = ${param_counter}"
            params.append(source_finder_id)
            param_counter += 1

        # if run_id:
        #     query += f" AND sr.run_id = ${param_counter}"
        #     params.append(run_id)
        #     param_counter += 1

        query += " ORDER BY sr.run_id, sr.rank DESC"

        sources = await conn.fetch(query, *params)
        return [dict(s) for s in sources]


# Initialize data in a single async function
async def initialize_data():
    global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels

    questions = await get_questions()
    source_finders = await get_source_finders()

    # Convert to dictionaries for easier lookup
    questions_dict = {q["id"]: q["text"] for q in questions}
    source_finders_dict = {f["id"]: f["name"] for f in source_finders}

    # Create formatted options for dropdowns
    question_options = [f"{q['id']}: {q['text']}" for q in questions]
    finder_options = [str(f["id"]) for f in source_finders]
    finder_labels = {str(f["id"]): f["name"] for f in source_finders}


# Main function to handle UI interactions
def update_source_runs(question_option, source_finder_id):
    if not question_option:
        return None, [], "No question selected", None

    # Extract question ID from selection
    question_id = int(question_option.split(":")[0])

    # Get run_ids for filtering - use asyncio.run for each independent operation
    # available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
    # run_id_options = [str(r_id) for r_id in available_run_ids]

    # If the selected run_id is not in available options, reset it
    # if run_id not in run_id_options:
    #     run_id = None
    #
    # # Convert run_id to int if not "All"
    # run_id_int = None if len(run_id) == 0 else int(run_id)
    finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)

    # Get source runs data
    source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))

    if not source_runs:
        return None, None, "No results found for the selected filters", None

    # Create DataFrame for display
    df = pd.DataFrame(source_runs)

    # Format table columns
    columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
    df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df

    # CSV for download
    csv_data = df.to_csv(index=False)

    result_message = f"Found {len(source_runs)} results"

    return df_display, result_message, csv_data


# Function to update run_id dropdown when question or source_finder changes
def update_run_ids(question_option, source_finder_id):
    if not question_option:
        return [], None, "No question selected", None

    # Extract question ID
    question_id = int(question_option.split(":")[0])

    # Convert source_finder_id if not "All"
    finder_id_int = None if source_finder_id == "All" else int(source_finder_id)

    # Get available run IDs
    available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
    run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]

    return run_id_options, None, "", None



# Create Gradio app

# Ensure we clean up when done
async def main():
    await get_pool()
    await initialize_data()
    with gr.Blocks(title="Source Runs Explorer") as app:
        gr.Markdown("# Source Runs Explorer")

        with gr.Row():
            with gr.Column(scale=3):
                # Main content area
                question_dropdown = gr.Dropdown(
                    choices=question_options,
                    label="Select Question",
                    interactive=True
                )

                with gr.Row():
                    source_finder_dropdown = gr.Dropdown(
                        choices=finder_options,
                        label="Source Finder",
                        interactive=True
                    )

                    # run_id_dropdown = gr.Dropdown(
                    #     choices=[],
                    #     value="",
                    #     label="Run ID",
                    #     interactive=True
                    # )

                result_text = gr.Markdown("Select a question to view source runs")

                results_table = gr.DataFrame(
                    headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
                    interactive=False
                )

                download_button = gr.DownloadButton(
                    label="Download Results as CSV",
                    interactive=True,
                    visible=True
                )

            with gr.Column(scale=1):
                # Sidebar area
                gr.Markdown("### About")
                gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
                gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")

                gr.Markdown("### Statistics")
                gr.Markdown(f"Total Questions: {len(questions)}")
                gr.Markdown(f"Source Finders: {len(source_finders)}")

                gr.Markdown("### Source Finders")
                for f in source_finders:
                    gr.Markdown(f"**{f['id']}**: {f['name']}")

        # Set up event handlers
        question_dropdown.change(
            update_source_runs,
            inputs=[question_dropdown, source_finder_dropdown],
            # outputs=[run_id_dropdown, results_table, result_text, download_button]
            outputs=[results_table, result_text, download_button]
        )

        source_finder_dropdown.change(
            update_source_runs,
            inputs=[question_dropdown, source_finder_dropdown],
            # outputs=[run_id_dropdown, results_table, result_text, download_button]
            outputs=[results_table, result_text, download_button]
        )

        # run_id_dropdown.change(
        #     update_source_runs,
        #     inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
        #     outputs=[results_table, run_id_dropdown, result_text, download_button]
        # )

        # Initial load of data when question is selected
        question_dropdown.change(
            update_source_runs,
            inputs=[question_dropdown, source_finder_dropdown],
            outputs=[results_table, result_text, download_button]
        )

    app.queue()
    app.launch()

if __name__ == "__main__":
    asyncio.run(main())