Spaces:
Sleeping
Sleeping
import asyncio | |
import gradio as gr | |
import pandas as pd | |
from data_access import get_questions, get_source_finders, get_run_ids, get_baseline_rankers, \ | |
get_unified_sources | |
# Initialize data at the module level | |
questions = [] | |
source_finders = [] | |
questions_dict = {} | |
source_finders_dict = {} | |
question_options = [] | |
baseline_rankers = [] | |
baseline_rankers_dict = {} | |
baseline_ranker_options = [] | |
run_ids = [] | |
finder_options = [] | |
finder_labels = {"All": "All Source Finders"} | |
previous_run_id = None | |
run_id_dropdown = None | |
# Get all questions | |
# Initialize data in a single async function | |
async def initialize_data(): | |
global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels, baseline_rankers, baseline_ranker_options | |
questions = await get_questions() | |
source_finders = await get_source_finders() | |
baseline_rankers = await get_baseline_rankers() | |
baseline_rankers_dict = {f["id"]: f["name"] for f in baseline_rankers} | |
# Convert to dictionaries for easier lookup | |
questions_dict = {q["id"]: q["text"] for q in questions} | |
source_finders_dict = {f["id"]: f["name"] for f in source_finders} | |
# Create formatted options for dropdowns | |
question_options = [f"{q['id']}: {q['text']}" for q in questions] | |
finder_options = [str(f["id"]) for f in source_finders] | |
finder_labels = {str(f["id"]): f["name"] for f in source_finders} | |
baseline_ranker_options = [f["id"] for f in baseline_rankers] | |
baseline_ranker_labels = {str(f["id"]): f["name"] for f in source_finders} | |
def update_sources_list(question_option, source_finder_id, baseline_ranker_id: str, run_id: str, evt: gr.EventData = None): | |
global previous_run_id | |
if evt and evt.target and evt.target.elem_id == "run_id_dropdown": | |
if run_id == previous_run_id: | |
return gr.update(), gr.update(), gr.update(), gr.update() | |
# Store the current run_id for future comparison | |
previous_run_id = run_id | |
return asyncio.run(update_sources_list_async(question_option, source_finder_id, baseline_ranker_id, run_id)) | |
# Main function to handle UI interactions | |
async def update_sources_list_async(question_option, source_finder_id, baseline_ranker_id: str, run_id: str): | |
if not question_option: | |
return gr.update(), gr.update(), gr.update(), "No question selected" | |
# Extract question ID from selection | |
question_id = int(question_option.split(":")[0]) | |
available_run_ids = await get_run_ids(question_id) | |
run_id_options = [str(r_id) for r_id in available_run_ids] | |
if run_id not in run_id_options: | |
run_id = run_id_options[0] | |
run_id_int = int(run_id) | |
finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id) | |
if type(baseline_ranker_id) == list: | |
baseline_ranker_id = baseline_ranker_id[0] | |
baseline_ranker_id_int = 1 if len(baseline_ranker_id) == 0 else int(baseline_ranker_id) | |
source_runs = None | |
stats = None | |
# Get source runs data | |
if finder_id_int: | |
source_runs, stats = await get_unified_sources(question_id, finder_id_int, run_id_int, baseline_ranker_id_int) | |
# Create DataFrame for display | |
df = pd.DataFrame(source_runs) | |
if not source_runs: | |
return None, None, run_id_options, "No results found for the selected filters", | |
# Format table columns | |
columns_to_display = ['sugya_id', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank', 'tractate', | |
'folio', 'reason'] | |
df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df | |
# CSV for download | |
# csv_data = df.to_csv(index=False) | |
result_message = f"Found {len(source_runs)} results" | |
return df_display, stats, run_id_options, result_message, | |
# Create Gradio app | |
# Ensure we clean up when done | |
async def main(): | |
global run_id_dropdown | |
await initialize_data() | |
with gr.Blocks(title="Source Runs Explorer") as app: | |
gr.Markdown("# Source Runs Explorer") | |
with gr.Row(): | |
with gr.Column(scale=3): | |
with gr.Row(): | |
with gr.Column(scale=1): | |
# Main content area | |
question_dropdown = gr.Dropdown( | |
choices=question_options, | |
label="Select Question", | |
value=None, | |
interactive=True, | |
elem_id="question_dropdown" | |
) | |
with gr.Column(scale=1): | |
baseline_rankers_dropdown = gr.Dropdown( | |
choices=baseline_ranker_options, | |
label="Select Baseline Ranker", | |
interactive=True, | |
elem_id="baseline_rankers_dropdown" | |
) | |
with gr.Row(): | |
with gr.Column(scale=1): | |
source_finder_dropdown = gr.Dropdown( | |
choices=finder_options, | |
label="Source Finder", | |
interactive=True, | |
elem_id="source_finder_dropdown" | |
) | |
with gr.Column(scale=1): | |
run_id_dropdown = gr.Dropdown( | |
choices=run_ids, | |
value="1", | |
allow_custom_value=True, | |
label="Run id for Question and source finder", | |
interactive=True, | |
elem_id="run_id_dropdown" | |
) | |
result_text = gr.Markdown("Select a question to view source runs") | |
gr.Markdown("# Source Run Statistics") | |
statistics_table = gr.DataFrame( | |
headers=["num_high_ranked_baseline_sources", | |
"num_high_ranked_found_sources", | |
"overlap_count", | |
"overlap_percentage", | |
"high_ranked_overlap_count", | |
"high_ranked_overlap_percentage" | |
], | |
interactive=False, | |
) | |
gr.Markdown("# Sources Found") | |
results_table = gr.DataFrame( | |
headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'], | |
interactive=False | |
) | |
# download_button = gr.DownloadButton( | |
# label="Download Results as CSV", | |
# interactive=True, | |
# visible=True | |
# ) | |
with gr.Column(scale=1): | |
# Sidebar area | |
gr.Markdown("### About") | |
gr.Markdown("This tool allows you to explore source runs for Talmudic questions.") | |
gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.") | |
gr.Markdown("### Statistics") | |
gr.Markdown(f"Total Questions: {len(questions)}") | |
gr.Markdown(f"Source Finders: {len(source_finders)}") | |
gr.Markdown("### Source Finders") | |
for f in source_finders: | |
gr.Markdown(f"**{f['id']}**: {f['name']}") | |
gr.Markdown("### Baseline Source Rankers") | |
for f in baseline_rankers: | |
gr.Markdown(f"**{f['id']}**: {f['name']}") | |
# Set up event handlers | |
question_dropdown.change( | |
update_sources_list, | |
inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown], | |
# outputs=[run_id_dropdown, results_table, result_text, download_button] | |
outputs=[results_table, statistics_table, run_id_dropdown, result_text] | |
) | |
source_finder_dropdown.change( | |
update_sources_list, | |
inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown], | |
# outputs=[run_id_dropdown, results_table, result_text, download_button] | |
outputs=[results_table, statistics_table, run_id_dropdown, result_text] | |
) | |
run_id_dropdown.change( | |
update_sources_list, | |
inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown], | |
outputs=[results_table, statistics_table, run_id_dropdown, result_text] | |
) | |
app.queue() | |
app.launch() | |
if __name__ == "__main__": | |
asyncio.run(main()) | |