Spaces:

davidr70
/

eval_results

Running

App Files Files Community

davidr70 commited on 16 days ago

Commit

a23bdc6

1 Parent(s): 3ed14b6

intial commit

Browse files

Files changed (4) hide show

.gitignore +3 -0
app.py +245 -0
data_access.py +55 -0
requirements.txt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+.idea/*
+*.iml

app.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import asyncio
+from typing import Optional
+import gradio as gr
+import pandas as pd
+from data_access import get_pool, get_async_connection, close_pool
+# Initialize data at the module level
+questions = []
+source_finders = []
+questions_dict = {}
+source_finders_dict = {}
+question_options = []
+finder_options = []
+finder_labels = {"All": "All Source Finders"}
+# Get all questions
+async def get_questions():
+    async with get_async_connection() as conn:
+        questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
+        return [{"id": q["id"], "text": q["question_text"]} for q in questions]
+# Get distinct source finders
+async def get_source_finders():
+    async with get_async_connection() as conn:
+        finders = await conn.fetch("SELECT id, source_finder_type as name FROM source_finders ORDER BY id")
+        return [{"id": f["id"], "name": f["name"]} for f in finders]
+# Get distinct run IDs for a question
+async def get_run_ids(question_id: int, source_finder_id: Optional[int] = None):
+    async with get_async_connection() as conn:
+        query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1"
+        params = [question_id]
+        if source_finder_id != "All" and source_finder_id is not None:
+            query += " AND source_finder_id = $2"
+            params.append(source_finder_id)
+        query += " ORDER BY run_id"
+        run_ids = await conn.fetch(query, *params)
+        return [r["run_id"] for r in run_ids]
+# Get source runs for a specific question with filters
+async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
+                          run_id: Optional[int] = None):
+    async with get_async_connection() as conn:
+        # Build query with filters
+        query = """
+            SELECT sr.*, sf.source_finder_type as finder_name
+            FROM source_runs sr
+            JOIN source_finders sf ON sr.source_finder_id = sf.id
+            WHERE sr.question_id = $1 and sr.run_id = 1
+        """
+        params = [question_id]
+        param_counter = 2
+        if source_finder_id:
+            query += f" AND sr.source_finder_id = ${param_counter}"
+            params.append(source_finder_id)
+            param_counter += 1
+        # if run_id:
+        #     query += f" AND sr.run_id = ${param_counter}"
+        #     params.append(run_id)
+        #     param_counter += 1
+        query += " ORDER BY sr.run_id, sr.rank DESC"
+        sources = await conn.fetch(query, *params)
+        return [dict(s) for s in sources]
+# Initialize data in a single async function
+async def initialize_data():
+    global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, finder_labels
+    questions = await get_questions()
+    source_finders = await get_source_finders()
+    # Convert to dictionaries for easier lookup
+    questions_dict = {q["id"]: q["text"] for q in questions}
+    source_finders_dict = {f["id"]: f["name"] for f in source_finders}
+    # Create formatted options for dropdowns
+    question_options = [f"{q['id']}: {q['text']}" for q in questions]
+    finder_options = [str(f["id"]) for f in source_finders]
+    finder_labels = {str(f["id"]): f["name"] for f in source_finders}
+# Main function to handle UI interactions
+def update_source_runs(question_option, source_finder_id):
+    if not question_option:
+        return None, [], "No question selected", None
+    # Extract question ID from selection
+    question_id = int(question_option.split(":")[0])
+    # Get run_ids for filtering - use asyncio.run for each independent operation
+    # available_run_ids = asyncio.run(get_run_ids(question_id, source_finder_id))
+    # run_id_options = [str(r_id) for r_id in available_run_ids]
+    # If the selected run_id is not in available options, reset it
+    # if run_id not in run_id_options:
+    #     run_id = None
+    #
+    # # Convert run_id to int if not "All"
+    # run_id_int = None if len(run_id) == 0 else int(run_id)
+    finder_id_int = None if len(source_finder_id) == 0 else int(source_finder_id)
+    # Get source runs data
+    source_runs = asyncio.run(get_source_runs(question_id, finder_id_int))
+    if not source_runs:
+        return None, None, "No results found for the selected filters", None
+    # Create DataFrame for display
+    df = pd.DataFrame(source_runs)
+    # Format table columns
+    columns_to_display = ['finder_name', 'run_id', 'sugya_id', 'tractate', 'folio', 'rank', 'reason']
+    df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
+    # CSV for download
+    csv_data = df.to_csv(index=False)
+    result_message = f"Found {len(source_runs)} results"
+    return df_display, result_message, csv_data
+# Function to update run_id dropdown when question or source_finder changes
+def update_run_ids(question_option, source_finder_id):
+    if not question_option:
+        return [], None, "No question selected", None
+    # Extract question ID
+    question_id = int(question_option.split(":")[0])
+    # Convert source_finder_id if not "All"
+    finder_id_int = None if source_finder_id == "All" else int(source_finder_id)
+    # Get available run IDs
+    available_run_ids = asyncio.run(get_run_ids(question_id, finder_id_int))
+    run_id_options = ["All"] + [str(run_id) for run_id in available_run_ids]
+    return run_id_options, None, "", None
+# Create Gradio app
+# Ensure we clean up when done
+async def main():
+    await get_pool()
+    await initialize_data()
+    with gr.Blocks(title="Source Runs Explorer") as app:
+        gr.Markdown("# Source Runs Explorer")
+        with gr.Row():
+            with gr.Column(scale=3):
+                # Main content area
+                question_dropdown = gr.Dropdown(
+                    choices=question_options,
+                    label="Select Question",
+                    interactive=True
+                )
+                with gr.Row():
+                    source_finder_dropdown = gr.Dropdown(
+                        choices=finder_options,
+                        label="Source Finder",
+                        interactive=True
+                    )
+                    # run_id_dropdown = gr.Dropdown(
+                    #     choices=[],
+                    #     value="",
+                    #     label="Run ID",
+                    #     interactive=True
+                    # )
+                result_text = gr.Markdown("Select a question to view source runs")
+                results_table = gr.DataFrame(
+                    headers=['Source Finder', 'Run ID', 'Sugya ID', 'Tractate', 'Folio', 'Rank', 'Reason'],
+                    interactive=False
+                )
+                download_button = gr.DownloadButton(
+                    label="Download Results as CSV",
+                    interactive=True,
+                    visible=True
+                )
+            with gr.Column(scale=1):
+                # Sidebar area
+                gr.Markdown("### About")
+                gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
+                gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")
+                gr.Markdown("### Statistics")
+                gr.Markdown(f"Total Questions: {len(questions)}")
+                gr.Markdown(f"Source Finders: {len(source_finders)}")
+                gr.Markdown("### Source Finders")
+                for f in source_finders:
+                    gr.Markdown(f"**{f['id']}**: {f['name']}")
+        # Set up event handlers
+        question_dropdown.change(
+            update_source_runs,
+            inputs=[question_dropdown, source_finder_dropdown],
+            # outputs=[run_id_dropdown, results_table, result_text, download_button]
+            outputs=[results_table, result_text, download_button]
+        )
+        source_finder_dropdown.change(
+            update_source_runs,
+            inputs=[question_dropdown, source_finder_dropdown],
+            # outputs=[run_id_dropdown, results_table, result_text, download_button]
+            outputs=[results_table, result_text, download_button]
+        )
+        # run_id_dropdown.change(
+        #     update_source_runs,
+        #     inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown],
+        #     outputs=[results_table, run_id_dropdown, result_text, download_button]
+        # )
+        # Initial load of data when question is selected
+        question_dropdown.change(
+            update_source_runs,
+            inputs=[question_dropdown, source_finder_dropdown],
+            outputs=[results_table, result_text, download_button]
+        )
+    app.queue()
+    app.launch()
+if __name__ == "__main__":
+    asyncio.run(main())

data_access.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import asyncio
+import os
+from contextlib import asynccontextmanager
+import asyncpg
+from dotenv import load_dotenv
+# Global connection pool
+_pool = None
+load_dotenv()
+async def get_pool(schema="talmudexplore", min_size=2, max_size=5):
+    """Initialize and return the connection pool with the specified schema."""
+    global _pool
+    if _pool is not None:
+        current_loop = asyncio.get_running_loop()
+        if getattr(_pool, '_loop', None) != current_loop:
+            try:
+                await _pool.close()
+            except:
+                pass
+            _pool = None
+    if _pool is None:
+        _pool = await asyncpg.create_pool(
+            database=os.getenv("pg_dbname"),
+            user=os.getenv("pg_user"),
+            password=os.getenv("pg_password"),
+            host=os.getenv("pg_host"),
+            port=os.getenv("pg_port"),
+            min_size=min_size,
+            max_size=max_size,
+            setup=lambda conn: conn.execute(f'SET search_path TO {schema}')
+        )
+    return _pool
+@asynccontextmanager
+async def get_async_connection():
+    """Get a connection from the pool as an async context manager."""
+    pool = await get_pool()
+    conn = await pool.acquire()
+    try:
+        yield conn
+    finally:
+        await pool.release(conn)
+async def close_pool():
+    """Close the connection pool."""
+    global _pool
+    if _pool:
+        await _pool.close()
+        _pool = None

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+asyncpg
+gradio
+dotenv