Spaces:

davidr70
/

eval_results

Sleeping

App Files Files Community

davidr70 commited on May 13

Commit

ea4284c

1 Parent(s): 92c49ff

changes for new version

Browse files

Files changed (4) hide show

app.py +85 -74
data_access.py +32 -13
requirements.txt +1 -1
tests/test_db_layer.py +14 -5

app.py CHANGED Viewed

@@ -33,45 +33,60 @@ run_id_dropdown = None
 # Initialize data in a single async function
 async def initialize_data():
-    global questions, source_finders, questions_dict, source_finders_dict, question_options, finder_options, baseline_rankers_dict, source_finders_dict, baseline_ranker_options
     async with get_async_connection() as conn:
-        # Get questions and source finders
-        questions = await get_questions(conn)
         source_finders = await get_source_finders(conn)
         baseline_rankers = await get_baseline_rankers(conn)
     # Convert to dictionaries for easier lookup
-    questions_dict = {q["text"]: q["id"] for q in questions}
     baseline_rankers_dict = {f["name"]: f["id"] for f in baseline_rankers}
     source_finders_dict = {f["name"]: f["id"] for f in source_finders}
     # Create formatted options for dropdowns
-    question_options = [q['text'] for q in questions]
     finder_options = [s["name"] for s in source_finders]
     baseline_ranker_options = [b["name"] for b in baseline_rankers]
-    await update_run_ids_async(ALL_QUESTIONS_STR, list(source_finders_dict.keys())[0])
-def update_run_ids(question_option, source_finder_name):
-    return asyncio.run(update_run_ids_async(question_option, source_finder_name))
-async def update_run_ids_async(question_option, source_finder_name):
-    global previous_run_id, available_run_id_dict, run_id_options
     async with get_async_connection() as conn:
         finder_id_int = source_finders_dict.get(source_finder_name)
-        if question_option and question_option != ALL_QUESTIONS_STR:
-            question_id = questions_dict.get(question_option)
-            available_run_id_dict = await get_run_ids(conn, finder_id_int, question_id)
-        else:
-            available_run_id_dict = await get_run_ids(conn, finder_id_int)
-        run_id = list(available_run_id_dict.keys())[0]
-        previous_run_id = run_id
-        run_id_options = list(available_run_id_dict.keys())
-        return None, None, gr.Dropdown(choices=run_id_options,
-                                       value=run_id), "Select Question to see results", ""
 def update_sources_list(question_option, source_finder_id, run_id: str, baseline_ranker_id: str,
                         evt: gr.EventData = None):
@@ -88,9 +103,11 @@ def update_sources_list(question_option, source_finder_id, run_id: str, baseline
 # Main function to handle UI interactions
 async def update_sources_list_async(question_option, source_finder_name, run_id, baseline_ranker_name: str):
-    global available_run_id_dict, previous_run_id
     if not question_option:
-        return gr.skip(), gr.skip(), gr.skip(), "No question selected", ""
     logger.info("processing update")
     async with get_async_connection() as conn:
         if type(baseline_ranker_name) == list:
@@ -106,28 +123,18 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
         if question_option == ALL_QUESTIONS_STR:
             if finder_id_int:
-                if run_id is None:
-                    available_run_id_dict = await get_run_ids(conn, finder_id_int)
-                    run_id = list(available_run_id_dict.keys())[0]
-                    previous_run_id = run_id
                 run_id_int = available_run_id_dict.get(run_id)
-                all_stats = await calculate_cumulative_statistics_for_all_questions(conn, run_id_int,
                                                                                     baseline_ranker_id_int)
             else:
-                run_id_options = list(available_run_id_dict.keys())
                 all_stats = None
-            run_id_options = list(available_run_id_dict.keys())
-            return None, all_stats, gr.Dropdown(choices=run_id_options,
-                                                value=run_id), "Select Run Id and source finder to see results", ""
         # Extract question ID from selection
         question_id = questions_dict.get(question_option)
         available_run_id_dict = await get_run_ids(conn, finder_id_int, question_id)
-        run_id_options = list(available_run_id_dict.keys())
-        if run_id not in run_id_options:
-            run_id = run_id_options[0]
         previous_run_id = run_id
         run_id_int = available_run_id_dict.get(run_id)
@@ -140,7 +147,7 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
             df = pd.DataFrame(source_runs)
         if not source_runs:
-            return None, None, run_id_options, "No results found for the selected filters",
         # Format table columns
         columns_to_display = ['sugya_id', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank',
@@ -152,8 +159,8 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
         # csv_data = df.to_csv(index=False)
         metadata = await get_metadata(conn, question_id, run_id_int)
-    result_message = f"Found {len(source_runs)} results"
-    return df_display, stats, gr.Dropdown(choices=run_id_options, value=run_id), result_message, metadata
 # Add a new function to handle row selection
@@ -189,46 +196,50 @@ async def main():
             with gr.Column(scale=3):
                 with gr.Row():
                     with gr.Column(scale=1):
-                        # Main content area
-                        question_dropdown = gr.Dropdown(
-                            choices=[ALL_QUESTIONS_STR] + question_options,
-                            label="Select Question",
                             value=None,
                             interactive=True,
-                            elem_id="question_dropdown"
                         )
                     with gr.Column(scale=1):
                         baseline_rankers_dropdown = gr.Dropdown(
                             choices=baseline_ranker_options,
                             label="Select Baseline Ranker",
                             interactive=True,
                             elem_id="baseline_rankers_dropdown"
                         )
                 with gr.Row():
                     with gr.Column(scale=1):
-                        source_finder_dropdown = gr.Dropdown(
-                            choices=finder_options,
-                            label="Source Finder",
-                            interactive=True,
-                            elem_id="source_finder_dropdown"
-                        )
-                    with gr.Column(scale=1):
-                        run_id_dropdown = gr.Dropdown(
-                            choices=run_id_options,
-                            allow_custom_value=True,
-                            label="Run id for Question and source finder",
                             interactive=True,
-                            elem_id="run_id_dropdown"
                         )
             with gr.Column(scale=1):
                 # Sidebar area
-                gr.Markdown("### About")
-                gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
-                gr.Markdown("### Statistics")
-                gr.Markdown(f"Total Questions: {len(questions)}")
-                gr.Markdown(f"Source Finders: {len(source_finders)}")
         with gr.Row():
             result_text = gr.Markdown("Select a question to view source runs")
@@ -283,29 +294,29 @@ async def main():
         )
         baseline_rankers_dropdown.change(
-            update_sources_list,
-            inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
-            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
         question_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
-            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
         source_finder_dropdown.change(
             update_run_ids,
-            inputs=[question_dropdown, source_finder_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
-            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
-        )
-        run_id_dropdown.change(
-            update_sources_list,
-            inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
-            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
     app.queue()

 # Initialize data in a single async function
 async def initialize_data():
+    global source_finders, source_finders_dict, finder_options, baseline_rankers_dict, source_finders_dict, baseline_ranker_options
     async with get_async_connection() as conn:
         source_finders = await get_source_finders(conn)
         baseline_rankers = await get_baseline_rankers(conn)
     # Convert to dictionaries for easier lookup
     baseline_rankers_dict = {f["name"]: f["id"] for f in baseline_rankers}
     source_finders_dict = {f["name"]: f["id"] for f in source_finders}
     # Create formatted options for dropdowns
     finder_options = [s["name"] for s in source_finders]
     baseline_ranker_options = [b["name"] for b in baseline_rankers]
+def update_run_ids(question_option, source_finder_name, baseline_ranker_name):
+    return asyncio.run(update_run_ids_async(question_option, source_finder_name, baseline_ranker_name))
+async def update_run_ids_async(question_option, source_finder_name, baseline_ranker_name):
+    global question_options, questions_dict, previous_run_id, available_run_id_dict, run_id_options
     async with get_async_connection() as conn:
         finder_id_int = source_finders_dict.get(source_finder_name)
+        available_run_id_dict = await get_run_ids(conn, finder_id_int)
+        run_id_options = list(available_run_id_dict.keys())
+        return gr.Dropdown(choices=[]), None, None, gr.Dropdown(choices=run_id_options,
+                                                                value=None), "Select Question to see results.csv", ""
+def update_questions_list(source_finder_name, run_id, baseline_ranker_name):
+    return asyncio.run(update_questions_list_async(source_finder_name, run_id, baseline_ranker_name))
+async def update_questions_list_async(source_finder_name, run_id, baseline_ranker_name):
+    global available_run_id_dict
+    if source_finder_name and run_id and baseline_ranker_name:
+        async with get_async_connection() as conn:
+            run_id_int = available_run_id_dict.get(run_id)
+            baseline_ranker_id = baseline_rankers_dict.get(baseline_ranker_name)
+            questions = await get_updated_question_list(conn, baseline_ranker_id, run_id_int)
+            return gr.Dropdown(choices=questions, value=None), None, None, None, None
+    else:
+        return None, None, None, None, None
+async def get_updated_question_list(conn, baseline_ranker_id, finder_id_int):
+    global questions_dict, questions
+    questions = await get_questions(conn, finder_id_int, baseline_ranker_id)
+    if questions:
+        questions_dict = {q["text"]: q["id"] for q in questions}
+        question_options = [ALL_QUESTIONS_STR] + [q['text'] for q in questions]
+    else:
+        question_options = []
+    return question_options
 def update_sources_list(question_option, source_finder_id, run_id: str, baseline_ranker_id: str,
                         evt: gr.EventData = None):
 # Main function to handle UI interactions
 async def update_sources_list_async(question_option, source_finder_name, run_id, baseline_ranker_name: str):
+    global available_run_id_dict, previous_run_id, questions_dict
     if not question_option:
+        return gr.skip(), gr.skip(), "No question selected", ""
+    if not source_finder_name or not run_id or not baseline_ranker_name:
+        return gr.skip(), gr.skip(), "Need to select source finder and baseline", ""
     logger.info("processing update")
     async with get_async_connection() as conn:
         if type(baseline_ranker_name) == list:
         if question_option == ALL_QUESTIONS_STR:
             if finder_id_int:
                 run_id_int = available_run_id_dict.get(run_id)
+                all_stats = await calculate_cumulative_statistics_for_all_questions(conn, list(questions_dict.values()),
+                                                                                    run_id_int,
                                                                                     baseline_ranker_id_int)
             else:
                 all_stats = None
+            return None, all_stats, "Select Run Id and source finder to see results.csv", ""
         # Extract question ID from selection
         question_id = questions_dict.get(question_option)
         available_run_id_dict = await get_run_ids(conn, finder_id_int, question_id)
         previous_run_id = run_id
         run_id_int = available_run_id_dict.get(run_id)
             df = pd.DataFrame(source_runs)
         if not source_runs:
+            return None, None, "No results.csv found for the selected filters",
         # Format table columns
         columns_to_display = ['sugya_id', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank',
         # csv_data = df.to_csv(index=False)
         metadata = await get_metadata(conn, question_id, run_id_int)
+    result_message = f"Found {len(source_runs)} results.csv"
+    return df_display, stats, result_message, metadata
 # Add a new function to handle row selection
             with gr.Column(scale=3):
                 with gr.Row():
                     with gr.Column(scale=1):
+                        source_finder_dropdown = gr.Dropdown(
+                            choices=finder_options,
                             value=None,
+                            label="Source Finder",
                             interactive=True,
+                            elem_id="source_finder_dropdown"
+                        )
+                    with gr.Column(scale=1):
+                        run_id_dropdown = gr.Dropdown(
+                            choices=run_id_options,
+                            value=None,
+                            allow_custom_value=True,
+                            label="source finder Run ID",
+                            interactive=True,
+                            elem_id="run_id_dropdown"
                         )
                     with gr.Column(scale=1):
                         baseline_rankers_dropdown = gr.Dropdown(
                             choices=baseline_ranker_options,
+                            value=None,
                             label="Select Baseline Ranker",
                             interactive=True,
                             elem_id="baseline_rankers_dropdown"
                         )
                 with gr.Row():
                     with gr.Column(scale=1):
+                        # Main content area
+                        question_dropdown = gr.Dropdown(
+                            choices=[ALL_QUESTIONS_STR] + question_options,
+                            label="Select Question (if list is empty this means there is no overlap between source run and baseline)",
+                            value=None,
                             interactive=True,
+                            elem_id="question_dropdown"
                         )
             with gr.Column(scale=1):
                 # Sidebar area
+                gr.Markdown("""To Get started select the following:
+                * Source Finder
+                * Source Finder Run ID (corresponds to a run of the source finder for a group of questions)
+                * Baseline Ranker (corresponds to a run of the baseline ranker for a group of questions)
+                **Note: if there is no overlap between the baseline questions and the source finder questions, the question list will be empty.**
+                """)
         with gr.Row():
             result_text = gr.Markdown("Select a question to view source runs")
         )
         baseline_rankers_dropdown.change(
+            update_questions_list,
+            inputs=[source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
+            outputs=[question_dropdown, result_text, metadata_text]
+        )
+        run_id_dropdown.change(
+            update_questions_list,
+            inputs=[source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
+            outputs=[question_dropdown, result_text, metadata_text, results_table, statistics_table]
         )
         question_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
+            outputs=[results_table, statistics_table, result_text, metadata_text]
         )
         source_finder_dropdown.change(
             update_run_ids,
+            inputs=[question_dropdown, source_finder_dropdown, baseline_rankers_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
+            outputs=[question_dropdown, results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
     app.queue()

data_access.py CHANGED Viewed

@@ -14,9 +14,17 @@ load_dotenv()
 @asynccontextmanager
-async def get_async_connection(schema="talmudexplore"):
-    """Get a connection for the current request."""
     conn = None
     try:
         # Create a single connection without relying on a shared pool
         conn = await asyncpg.connect(
@@ -27,14 +35,27 @@ async def get_async_connection(schema="talmudexplore"):
             port=os.getenv("pg_port")
         )
         await conn.execute(f'SET search_path TO {schema}')
         yield conn
     finally:
         if conn:
             await conn.close()
-async def get_questions(conn: asyncpg.Connection):
-    questions = await conn.fetch("SELECT id, question_text FROM questions where question_group_id = 1 ORDER BY id")
     return [{"id": q["id"], "text": q["question_text"]} for q in questions]
 @cached(cache=TTLCache(ttl=1800, maxsize=1024))
@@ -96,7 +117,7 @@ async def get_baseline_rankers(conn: asyncpg.Connection):
         FROM source_run_results srr
         WHERE srr.source_finder_run_id = sfr.id
     )
-    ORDER BY sf.id
     """
     rankers = await conn.fetch(query)
@@ -131,26 +152,24 @@ async def calculate_baseline_vs_source_stats_for_question(conn: asyncpg.Connecti
         "high_ranked_overlap_count": len(high_ranked_overlap),
         "high_ranked_overlap_percentage": round(len(high_ranked_overlap) * 100 / max(len(actual_high_ranked), len(baseline_high_ranked)), 2) if max(len(actual_high_ranked), len(baseline_high_ranked)) > 0 else 0
     }
-    #convert results to dataframe
     results_df = pd.DataFrame([results])
     return results_df
-async def calculate_cumulative_statistics_for_all_questions(conn: asyncpg.Connection, source_finder_run_id: int, ranker_id: int):
     """
     Calculate cumulative statistics across all questions for a specific source finder, run, and ranker.
     Args:
         source_finder_run_id (int): ID of the source finder and run as appears in source runs
         ranker_id (int): ID of the baseline ranker
     Returns:
         pd.DataFrame: DataFrame containing aggregated statistics
     """
-    # Get all questions
-    query = "SELECT id FROM questions ORDER BY id"
-    questions = await conn.fetch(query)
-    question_ids = [q["id"] for q in questions]
     # Initialize aggregates
     total_baseline_sources = 0
@@ -190,7 +209,7 @@ async def calculate_cumulative_statistics_for_all_questions(conn: asyncpg.Connec
         total_high_ranked_overlap * 100 / max(total_high_ranked_baseline, total_high_ranked_found), 2) \
         if max(total_high_ranked_baseline, total_high_ranked_found) > 0 else 0
-    # Compile results
     cumulative_stats = {
         "total_questions_analyzed": valid_questions,
         "total_baseline_sources": total_baseline_sources,
@@ -237,7 +256,7 @@ async def get_unified_sources(conn: asyncpg.Connection, question_id: int, source
     baseline_dict = {s["id"]: dict(s) for s in baseline_sources}
     # Get all unique sugya_ids
     all_sugya_ids = set(source_runs_dict.keys()) | set(baseline_dict.keys())
-    # Build unified results
     unified_results = []
     for sugya_id in all_sugya_ids:
         in_source_run = sugya_id in source_runs_dict

 @asynccontextmanager
+async def get_async_connection(schema="talmudexplore", auto_commit=True):
+    """
+    Get a connection for the current request.
+    Args:
+        schema: Database schema to use
+        auto_commit: If True (default), each statement auto-commits.
+                     If False, requires explicit commit.
+    """
     conn = None
+    tx = None
     try:
         # Create a single connection without relying on a shared pool
         conn = await asyncpg.connect(
             port=os.getenv("pg_port")
         )
         await conn.execute(f'SET search_path TO {schema}')
+        if not auto_commit:
+            # Start a transaction that requires explicit commit
+            tx = conn.transaction()
+            await tx.start()
         yield conn
+        if not auto_commit and tx:
+            await tx.commit()
     finally:
         if conn:
             await conn.close()
+async def get_questions(conn: asyncpg.Connection, source_finder_run_id: int, baseline_source_finder_run_id: int):
+    questions = await conn.fetch("""
+    select distinct q.id, question_text from talmudexplore.questions q
+    join (select question_id from talmudexplore.source_finder_run_question_metadata where source_finder_run_id = $1) sfrqm1
+        on sfrqm1.question_id = q.id
+    join (select question_id from talmudexplore.source_finder_run_question_metadata where source_finder_run_id = $2) sfrqm2
+        on sfrqm2.question_id = q.id;
+    """, source_finder_run_id, baseline_source_finder_run_id)
     return [{"id": q["id"], "text": q["question_text"]} for q in questions]
 @cached(cache=TTLCache(ttl=1800, maxsize=1024))
         FROM source_run_results srr
         WHERE srr.source_finder_run_id = sfr.id
     )
+    ORDER BY sf.id DESC
     """
     rankers = await conn.fetch(query)
         "high_ranked_overlap_count": len(high_ranked_overlap),
         "high_ranked_overlap_percentage": round(len(high_ranked_overlap) * 100 / max(len(actual_high_ranked), len(baseline_high_ranked)), 2) if max(len(actual_high_ranked), len(baseline_high_ranked)) > 0 else 0
     }
+    #convert results.csv to dataframe
     results_df = pd.DataFrame([results])
     return results_df
+async def calculate_cumulative_statistics_for_all_questions(conn: asyncpg.Connection, question_ids, source_finder_run_id: int, ranker_id: int):
     """
     Calculate cumulative statistics across all questions for a specific source finder, run, and ranker.
     Args:
+        conn (asyncpg.Connection): Database connection
+        question_ids (list): List of question IDs to analyze
         source_finder_run_id (int): ID of the source finder and run as appears in source runs
         ranker_id (int): ID of the baseline ranker
     Returns:
         pd.DataFrame: DataFrame containing aggregated statistics
     """
     # Initialize aggregates
     total_baseline_sources = 0
         total_high_ranked_overlap * 100 / max(total_high_ranked_baseline, total_high_ranked_found), 2) \
         if max(total_high_ranked_baseline, total_high_ranked_found) > 0 else 0
+    # Compile results.csv
     cumulative_stats = {
         "total_questions_analyzed": valid_questions,
         "total_baseline_sources": total_baseline_sources,
     baseline_dict = {s["id"]: dict(s) for s in baseline_sources}
     # Get all unique sugya_ids
     all_sugya_ids = set(source_runs_dict.keys()) | set(baseline_dict.keys())
+    # Build unified results.csv
     unified_results = []
     for sugya_id in all_sugya_ids:
         in_source_run = sugya_id in source_runs_dict

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 asyncpg
 gradio
 dotenv
-psycopg2
 cachetools

 asyncpg
 gradio
 dotenv
+psycopg2-binary
 cachetools

tests/test_db_layer.py CHANGED Viewed

@@ -2,9 +2,16 @@ import pandas as pd
 import pytest
 from data_access import calculate_cumulative_statistics_for_all_questions, get_metadata, get_run_ids, \
-    get_async_connection
 from data_access import get_unified_sources
 @pytest.mark.asyncio
 async def test_get_unified_sources():
@@ -13,7 +20,7 @@ async def test_get_unified_sources():
     assert results is not None
     assert stats is not None
-    # Check number of rows in results list
     assert len(results) > 4, "Results should contain at least one row"
     # Check number of rows in stats DataFrame
@@ -30,9 +37,11 @@ async def test_calculate_cumulative_statistics_for_all_questions():
     # Call the function to test
     async with get_async_connection() as conn:
-        result = await calculate_cumulative_statistics_for_all_questions(conn, source_finder_run_id, ranker_id)
-    # Check basic structure of results
     assert isinstance(result, pd.DataFrame), "Result should be a pandas DataFrame"
     assert result.shape[0] == 1, "Result should have one row"
@@ -74,7 +83,7 @@ async def test_get_metadata_none_returned():
     async with get_async_connection() as conn:
         result = await get_metadata(conn, question_id, source_finder_run_id)
-    assert result == "", "Should return empty string when no metadata is found"
 @pytest.mark.asyncio
 async def test_get_metadata():

 import pytest
 from data_access import calculate_cumulative_statistics_for_all_questions, get_metadata, get_run_ids, \
+    get_async_connection, get_questions
 from data_access import get_unified_sources
+@pytest.mark.asyncio
+async def test_get_questions():
+    source_run_id = 2
+    baseline_source_finder_run_id = 1
+    async with get_async_connection() as conn:
+        actual = await get_questions(conn, source_run_id, baseline_source_finder_run_id)
+    assert len(actual) == 10
 @pytest.mark.asyncio
 async def test_get_unified_sources():
     assert results is not None
     assert stats is not None
+    # Check number of rows in results.csv list
     assert len(results) > 4, "Results should contain at least one row"
     # Check number of rows in stats DataFrame
     # Call the function to test
     async with get_async_connection() as conn:
+        questions = await get_questions(conn, source_finder_run_id, ranker_id)
+        question_ids = [question['id'] for question in questions]
+        result = await calculate_cumulative_statistics_for_all_questions(conn, question_ids, source_finder_run_id, ranker_id)
+    # Check basic structure of results.csv
     assert isinstance(result, pd.DataFrame), "Result should be a pandas DataFrame"
     assert result.shape[0] == 1, "Result should have one row"
     async with get_async_connection() as conn:
         result = await get_metadata(conn, question_id, source_finder_run_id)
+    assert result == {}, "Should return empty string when no metadata is found"
 @pytest.mark.asyncio
 async def test_get_metadata():