Spaces:

davidr70
/

eval_results

Running

App Files Files Community

davidr70 commited on Apr 29

Commit

83afd54

1 Parent(s): 6e35819

improvements

Browse files

Files changed (2) hide show

app.py +22 -19
data_access.py +9 -36

app.py CHANGED Viewed

@@ -2,8 +2,9 @@ import asyncio
 import gradio as gr
 import pandas as pd
-from data_access import get_pool, get_async_connection, close_pool, get_questions, get_source_finders, get_run_ids, \
-    get_source_runs, get_baseline_rankers, calculate_baseline_vs_source_stats_for_question, get_unified_sources
 # Initialize data at the module level
 questions = []
@@ -43,16 +44,19 @@ async def initialize_data():
     baseline_ranker_labels = {str(f["id"]): f["name"] for f in source_finders}
 # Main function to handle UI interactions
-def update_sources_list(question_option, source_finder_id, baseline_ranker_id: str, run_id:str):
     if not question_option:
         return None, [], "No question selected", None
     # Extract question ID from selection
     question_id = int(question_option.split(":")[0])
-    # Get run_ids for filtering - use asyncio.run for each independent operation
-    available_run_ids = asyncio.run(get_run_ids(question_id))
     run_id_options = [str(r_id) for r_id in available_run_ids]
     if run_id not in run_id_options:
         run_id = run_id_options[0]
@@ -67,16 +71,16 @@ def update_sources_list(question_option, source_finder_id, baseline_ranker_id: s
     stats = None
     # Get source runs data
     if finder_id_int:
-        source_runs, stats = asyncio.run(get_unified_sources(question_id, finder_id_int, run_id_int, baseline_ranker_id_int))
         # Create DataFrame for display
         df = pd.DataFrame(source_runs)
     if not source_runs:
         return None, None, run_id_options, "No results found for the selected filters",
     # Format table columns
-    columns_to_display = ['sugya_id', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank', 'tractate', 'folio', 'reason']
     df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
     # CSV for download
@@ -90,7 +94,6 @@ def update_sources_list(question_option, source_finder_id, baseline_ranker_id: s
 # Ensure we clean up when done
 async def main():
-    await get_pool()
     await initialize_data()
     with gr.Blocks(title="Source Runs Explorer") as app:
         gr.Markdown("# Source Runs Explorer")
@@ -129,17 +132,16 @@ async def main():
                             interactive=True
                         )
                 result_text = gr.Markdown("Select a question to view source runs")
                 gr.Markdown("# Source Run Statistics")
                 statistics_table = gr.DataFrame(
                     headers=["num_high_ranked_baseline_sources",
-                            "num_high_ranked_found_sources",
-                            "overlap_count",
-                            "overlap_percentage",
-                            "high_ranked_overlap_count",
-                            "high_ranked_overlap_percentage"
-                    ],
                     interactive=False,
                 )
                 gr.Markdown("# Sources Found")
@@ -187,9 +189,9 @@ async def main():
         )
         run_id_dropdown.change(
-             update_sources_list,
-             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
-             outputs=[results_table, statistics_table, run_id_dropdown, result_text]
         )
         # Initial load of data when question is selected
@@ -202,5 +204,6 @@ async def main():
     app.queue()
     app.launch()
 if __name__ == "__main__":
     asyncio.run(main())

 import gradio as gr
 import pandas as pd
+from data_access import get_questions, get_source_finders, get_run_ids, get_baseline_rankers, \
+    get_unified_sources
 # Initialize data at the module level
 questions = []
     baseline_ranker_labels = {str(f["id"]): f["name"] for f in source_finders}
+def update_sources_list(question_option, source_finder_id, baseline_ranker_id: str, run_id: str):
+    return asyncio.run(update_sources_list_async(question_option, source_finder_id, baseline_ranker_id, run_id))
 # Main function to handle UI interactions
+async def update_sources_list_async(question_option, source_finder_id, baseline_ranker_id: str, run_id: str):
     if not question_option:
         return None, [], "No question selected", None
     # Extract question ID from selection
     question_id = int(question_option.split(":")[0])
+    available_run_ids = await get_run_ids(question_id)
     run_id_options = [str(r_id) for r_id in available_run_ids]
     if run_id not in run_id_options:
         run_id = run_id_options[0]
     stats = None
     # Get source runs data
     if finder_id_int:
+        source_runs, stats = await get_unified_sources(question_id, finder_id_int, run_id_int, baseline_ranker_id_int)
         # Create DataFrame for display
         df = pd.DataFrame(source_runs)
     if not source_runs:
         return None, None, run_id_options, "No results found for the selected filters",
     # Format table columns
+    columns_to_display = ['sugya_id', 'in_baseline', 'baseline_rank', 'in_source_run', 'source_run_rank', 'tractate',
+                          'folio', 'reason']
     df_display = df[columns_to_display] if all(col in df.columns for col in columns_to_display) else df
     # CSV for download
 # Ensure we clean up when done
 async def main():
     await initialize_data()
     with gr.Blocks(title="Source Runs Explorer") as app:
         gr.Markdown("# Source Runs Explorer")
                             interactive=True
                         )
                 result_text = gr.Markdown("Select a question to view source runs")
                 gr.Markdown("# Source Run Statistics")
                 statistics_table = gr.DataFrame(
                     headers=["num_high_ranked_baseline_sources",
+                             "num_high_ranked_found_sources",
+                             "overlap_count",
+                             "overlap_percentage",
+                             "high_ranked_overlap_count",
+                             "high_ranked_overlap_percentage"
+                             ],
                     interactive=False,
                 )
                 gr.Markdown("# Sources Found")
         )
         run_id_dropdown.change(
+            update_sources_list,
+            inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
+            outputs=[results_table, statistics_table, run_id_dropdown, result_text]
         )
         # Initial load of data when question is selected
     app.queue()
     app.launch()
 if __name__ == "__main__":
     asyncio.run(main())

data_access.py CHANGED Viewed

@@ -9,52 +9,25 @@ from dotenv import load_dotenv
 import pandas as pd
 # Global connection pool
-_pool = None
 load_dotenv()
-async def get_pool(schema="talmudexplore", min_size=2, max_size=5):
-    """Initialize and return the connection pool with the specified schema."""
-    global _pool
-    if _pool is not None:
-        current_loop = asyncio.get_running_loop()
-        if getattr(_pool, '_loop', None) != current_loop:
-            try:
-                await _pool.close()
-            except:
-                pass
-            _pool = None
-    if _pool is None:
-        _pool = await asyncpg.create_pool(
             database=os.getenv("pg_dbname"),
             user=os.getenv("pg_user"),
             password=os.getenv("pg_password"),
             host=os.getenv("pg_host"),
-            port=os.getenv("pg_port"),
-            min_size=min_size,
-            max_size=max_size,
-            setup=lambda conn: conn.execute(f'SET search_path TO {schema}')
         )
-    return _pool
-@asynccontextmanager
-async def get_async_connection():
-    """Get a connection from the pool as an async context manager."""
-    pool = await get_pool()
-    conn = await pool.acquire()
-    try:
         yield conn
     finally:
-        await pool.release(conn)
-async def close_pool():
-    """Close the connection pool."""
-    global _pool
-    if _pool:
-        await _pool.close()
-        _pool = None
 async def get_questions():

 import pandas as pd
 # Global connection pool
 load_dotenv()
+@asynccontextmanager
+async def get_async_connection(schema="talmudexplore"):
+    """Get a connection for the current request."""
+    try:
+        # Create a single connection without relying on a shared pool
+        conn = await asyncpg.connect(
             database=os.getenv("pg_dbname"),
             user=os.getenv("pg_user"),
             password=os.getenv("pg_password"),
             host=os.getenv("pg_host"),
+            port=os.getenv("pg_port")
         )
+        await conn.execute(f'SET search_path TO {schema}')
         yield conn
     finally:
+        await conn.close()
 async def get_questions():