Spaces:

davidr70
/

eval_results

Sleeping

App Files Files Community

eval_results / app.py

davidr70

intial commit

a23bdc6 about 1 month ago

raw

history blame

8.8 kB

	import asyncio
	from typing import Optional
	import gradio as gr
	import pandas as pd
	from data_access import get_pool, get_async_connection, close_pool

	# Initialize data at the module level
	questions = []
	source_finders = []
	questions_dict = {}
	source_finders_dict = {}
	question_options = []
	finder_options = []
	finder_labels = {"All": "All Source Finders"}


	# Get all questions
	async def get_questions():
	async with get_async_connection() as conn:
	questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
	return [{"id": q["id"], "text": q["question_text"]} for q in questions]


	# Get distinct source finders
	async def get_source_finders():
	async with get_async_connection() as conn:
	finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
	return [{"id": f["id"], "name": f["name"]} for f in finders]


	# Get distinct run IDs for a question
	async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
	async with get_async_connection() as conn:
	query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
	params = [question_id]

	if source_finder_id != "All" and source_finder_id is not None:
	query += " AND source_finder_id = $2"
	params.append(source_finder_id)

	query += " ORDER BY run_id"

	run_ids = await conn.fetch(query, *params)
	return [r["run_id"] for r in run_ids]


	# Get source runs for a specific question with filters
	async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
	run_id: Optional[int] = None):
	async with get_async_connection() as conn:
	# Build query with filters
	query = """
	SELECT sr.*, sf.source_finder_type as finder_name
	FROM source_runs sr
	JOIN source_finders sf ON sr.source_finder_id = sf.id
	WHERE sr.question_id = $1 and sr.run_id = 1
	"""
	params = [question_id]
	param_counter = 2

	if source_finder_id:
	query += f" AND sr.source_finder_id = ${param_counter}"
	params.append(source_finder_id)
	param_counter += 1

	# if run_id:
	# query += f" AND sr.run_id = ${param_counter}"
	# params.append(run_id)
	# param_counter += 1

	query += " ORDER BY sr.run_id, sr.rank DESC"

	sources = await conn.fetch(query, *params)
	return [dict(s) for s in sources]


	# Initialize data in a single async function
	async def initialize_data():
	global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels

	questions = await get_questions()
	source_finders = await get_source_finders()

	# Convert to dictionaries for easier lookup
	questions_dict = {q["id"]: q["text"] for q in questions}
	source_finders_dict = {f["id"]: f["name"] for f in source_finders}

	# Create formatted options for dropdowns
	question_options = [f"{q['id']}: {q['text']}" for q in questions]
	finder_options = [str(f["id"]) for f in source_finders]
	finder_labels = {str(f["id"]): f["name"] for f in source_finders}


	# Main function to handle UI interactions
	def update_source_runs(question_option, source_finder_id):
	if not question_option:
	return None, [], "No question selected", None

	# Extract question ID from selection
	question_id = int(question_option.split(":")[0])

	# Get run_ids for filtering - use asyncio.run for each independent operation
	# available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
	# run_id_options = [str(r_id) for r_id in available_run_ids]

	# If the selected run_id is not in available options, reset it
	# if run_id not in run_id_options:
	# run_id = None
	#
	# # Convert run_id to int if not "All"
	# run_id_int = None if len(run_id) == 0 else int(run_id)
	finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)

	# Get source runs data
	source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))

	if not source_runs:
	return None, None, "No results found for the selected filters", None

	# Create DataFrame for display
	df = pd.DataFrame(source_runs)

	# Format table columns
	columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
	df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df

	# CSV for download
	csv_data = df.to_csv(index=False)

	result_message = f"Found {len(source_runs)} results"

	return df_display, result_message, csv_data


	# Function to update run_id dropdown when question or source_finder changes
	def update_run_ids(question_option, source_finder_id):
	if not question_option:
	return [], None, "No question selected", None

	# Extract question ID
	question_id = int(question_option.split(":")[0])

	# Convert source_finder_id if not "All"
	finder_id_int = None if source_finder_id == "All" else int(source_finder_id)

	# Get available run IDs
	available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
	run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]

	return run_id_options, None, "", None



	# Create Gradio app

	# Ensure we clean up when done
	async def main():
	await get_pool()
	await initialize_data()
	with gr.Blocks(title="Source Runs Explorer") as app:
	gr.Markdown("# Source Runs Explorer")

	with gr.Row():
	with gr.Column(scale=3):
	# Main content area
	question_dropdown = gr.Dropdown(
	choices=question_options,
	label="Select Question",
	interactive=True
	)

	with gr.Row():
	source_finder_dropdown = gr.Dropdown(
	choices=finder_options,
	label="Source Finder",
	interactive=True
	)

	# run_id_dropdown = gr.Dropdown(
	# choices=[],
	# value="",
	# label="Run ID",
	# interactive=True
	# )

	result_text = gr.Markdown("Select a question to view source runs")

	results_table = gr.DataFrame(
	headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
	interactive=False
	)

	download_button = gr.DownloadButton(
	label="Download Results as CSV",
	interactive=True,
	visible=True
	)

	with gr.Column(scale=1):
	# Sidebar area
	gr.Markdown("### About")
	gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
	gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")

	gr.Markdown("### Statistics")
	gr.Markdown(f"Total Questions: {len(questions)}")
	gr.Markdown(f"Source Finders: {len(source_finders)}")

	gr.Markdown("### Source Finders")
	for f in source_finders:
	gr.Markdown(f"{f['id']}: {f['name']}")

	# Set up event handlers
	question_dropdown.change(
	update_source_runs,
	inputs=[question_dropdown, source_finder_dropdown],
	# outputs=[run_id_dropdown, results_table, result_text, download_button]
	outputs=[results_table, result_text, download_button]
	)

	source_finder_dropdown.change(
	update_source_runs,
	inputs=[question_dropdown, source_finder_dropdown],
	# outputs=[run_id_dropdown, results_table, result_text, download_button]
	outputs=[results_table, result_text, download_button]
	)

	# run_id_dropdown.change(
	# update_source_runs,
	# inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
	# outputs=[results_table, run_id_dropdown, result_text, download_button]
	# )

	# Initial load of data when question is selected
	question_dropdown.change(
	update_source_runs,
	inputs=[question_dropdown, source_finder_dropdown],
	outputs=[results_table, result_text, download_button]
	)

	app.queue()
	app.launch()

	if __name__ == "__main__":
	asyncio.run(main())