eval_results / app.py
davidr70's picture
intial commit
a23bdc6
raw
history blame
8.8 kB
import asyncio
from typing import Optional
import gradio as gr
import pandas as pd
from data_access import get_pool, get_async_connection, close_pool
# Initialize data at the module level
questions = []
source_finders = []
questions_dict = {}
source_finders_dict = {}
question_options = []
finder_options = []
finder_labels = {"All": "All Source Finders"}
# Get all questions
async def get_questions():
async with get_async_connection() as conn:
questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
return [{"id": q["id"], "text": q["question_text"]} for q in questions]
# Get distinct source finders
async def get_source_finders():
async with get_async_connection() as conn:
finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
return [{"id": f["id"], "name": f["name"]} for f in finders]
# Get distinct run IDs for a question
async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
async with get_async_connection() as conn:
query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
params = [question_id]
if source_finder_id != "All" and source_finder_id is not None:
query += " AND source_finder_id = $2"
params.append(source_finder_id)
query += " ORDER BY run_id"
run_ids = await conn.fetch(query, *params)
return [r["run_id"] for r in run_ids]
# Get source runs for a specific question with filters
async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
run_id: Optional[int] = None):
async with get_async_connection() as conn:
# Build query with filters
query = """
SELECT sr.*, sf.source_finder_type as finder_name
FROM source_runs sr
JOIN source_finders sf ON sr.source_finder_id = sf.id
WHERE sr.question_id = $1 and sr.run_id = 1
"""
params = [question_id]
param_counter = 2
if source_finder_id:
query += f" AND sr.source_finder_id = ${param_counter}"
params.append(source_finder_id)
param_counter += 1
# if run_id:
# query += f" AND sr.run_id = ${param_counter}"
# params.append(run_id)
# param_counter += 1
query += " ORDER BY sr.run_id, sr.rank DESC"
sources = await conn.fetch(query, *params)
return [dict(s) for s in sources]
# Initialize data in a single async function
async def initialize_data():
global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels
questions = await get_questions()
source_finders = await get_source_finders()
# Convert to dictionaries for easier lookup
questions_dict = {q["id"]: q["text"] for q in questions}
source_finders_dict = {f["id"]: f["name"] for f in source_finders}
# Create formatted options for dropdowns
question_options = [f"{q['id']}: {q['text']}" for q in questions]
finder_options = [str(f["id"]) for f in source_finders]
finder_labels = {str(f["id"]): f["name"] for f in source_finders}
# Main function to handle UI interactions
def update_source_runs(question_option, source_finder_id):
if not question_option:
return None, [], "No question selected", None
# Extract question ID from selection
question_id = int(question_option.split(":")[0])
# Get run_ids for filtering - use asyncio.run for each independent operation
# available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
# run_id_options = [str(r_id) for r_id in available_run_ids]
# If the selected run_id is not in available options, reset it
# if run_id not in run_id_options:
# run_id = None
#
# # Convert run_id to int if not "All"
# run_id_int = None if len(run_id) == 0 else int(run_id)
finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)
# Get source runs data
source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))
if not source_runs:
return None, None, "No results found for the selected filters", None
# Create DataFrame for display
df = pd.DataFrame(source_runs)
# Format table columns
columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
# CSV for download
csv_data = df.to_csv(index=False)
result_message = f"Found {len(source_runs)} results"
return df_display, result_message, csv_data
# Function to update run_id dropdown when question or source_finder changes
def update_run_ids(question_option, source_finder_id):
if not question_option:
return [], None, "No question selected", None
# Extract question ID
question_id = int(question_option.split(":")[0])
# Convert source_finder_id if not "All"
finder_id_int = None if source_finder_id == "All" else int(source_finder_id)
# Get available run IDs
available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]
return run_id_options, None, "", None
# Create Gradio app
# Ensure we clean up when done
async def main():
await get_pool()
await initialize_data()
with gr.Blocks(title="Source Runs Explorer") as app:
gr.Markdown("# Source Runs Explorer")
with gr.Row():
with gr.Column(scale=3):
# Main content area
question_dropdown = gr.Dropdown(
choices=question_options,
label="Select Question",
interactive=True
)
with gr.Row():
source_finder_dropdown = gr.Dropdown(
choices=finder_options,
label="Source Finder",
interactive=True
)
# run_id_dropdown = gr.Dropdown(
# choices=[],
# value="",
# label="Run ID",
# interactive=True
# )
result_text = gr.Markdown("Select a question to view source runs")
results_table = gr.DataFrame(
headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
interactive=False
)
download_button = gr.DownloadButton(
label="Download Results as CSV",
interactive=True,
visible=True
)
with gr.Column(scale=1):
# Sidebar area
gr.Markdown("### About")
gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")
gr.Markdown("### Statistics")
gr.Markdown(f"Total Questions: {len(questions)}")
gr.Markdown(f"Source Finders: {len(source_finders)}")
gr.Markdown("### Source Finders")
for f in source_finders:
gr.Markdown(f"**{f['id']}**: {f['name']}")
# Set up event handlers
question_dropdown.change(
update_source_runs,
inputs=[question_dropdown, source_finder_dropdown],
# outputs=[run_id_dropdown, results_table, result_text, download_button]
outputs=[results_table, result_text, download_button]
)
source_finder_dropdown.change(
update_source_runs,
inputs=[question_dropdown, source_finder_dropdown],
# outputs=[run_id_dropdown, results_table, result_text, download_button]
outputs=[results_table, result_text, download_button]
)
# run_id_dropdown.change(
# update_source_runs,
# inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
# outputs=[results_table, run_id_dropdown, result_text, download_button]
# )
# Initial load of data when question is selected
question_dropdown.change(
update_source_runs,
inputs=[question_dropdown, source_finder_dropdown],
outputs=[results_table, result_text, download_button]
)
app.queue()
app.launch()
if __name__ == "__main__":
asyncio.run(main())