Spaces:

davidr70
/

eval_results

Sleeping

App Files Files Community

davidr70 commited on May 5

Commit

312213e

1 Parent(s): 322ed33

changes to use new table and descriptive runs

Browse files

Files changed (5) hide show

app.py +23 -14
data_access.py +77 -74
eval_tables.py +29 -6
scripts/__init__.py +0 -0
tests/test_db_layer.py +45 -4

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import pandas as pd
 import logging
 from data_access import get_questions, get_source_finders, get_run_ids, get_baseline_rankers, \
-    get_unified_sources, get_source_text, calculate_cumulative_statistics_for_all_questions
 logger = logging.getLogger(__name__)
@@ -18,8 +18,9 @@ question_options = []
 baseline_rankers_dict = {}
 baseline_ranker_options = []
 run_ids = []
 finder_options = []
-previous_run_id = 1
 run_id_dropdown = None
@@ -60,8 +61,9 @@ def update_sources_list(question_option, source_finder_id, run_id: str, baseline
 # Main function to handle UI interactions
 async def update_sources_list_async(question_option, source_finder_name, run_id, baseline_ranker_name: str):
     if not question_option:
-        return gr.skip(), gr.skip(), gr.skip(), "No question selected"
     logger.info("processing update")
     if type(baseline_ranker_name) == list:
         baseline_ranker_name = baseline_ranker_name[0]
@@ -75,20 +77,21 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
     if question_option == "All questions":
         if finder_id_int and type(run_id) == str:
-            all_stats = await calculate_cumulative_statistics_for_all_questions(finder_id_int, int(run_id), baseline_ranker_id_int)
         else:
             all_stats = None
-        return None, all_stats, gr.skip(), "Select Run Id and source finder to see results"
     # Extract question ID from selection
     question_id = questions_dict.get(question_option)
-    available_run_ids = await get_run_ids(question_id)
-    run_id_options = [str(r_id) for r_id in available_run_ids]
     if run_id not in run_id_options:
         run_id = run_id_options[0]
-    run_id_int = int(run_id)
@@ -96,7 +99,7 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
     stats = None
     # Get source runs data
     if finder_id_int:
-        source_runs, stats = await get_unified_sources(question_id, finder_id_int, run_id_int, baseline_ranker_id_int)
         # Create DataFrame for display
         df = pd.DataFrame(source_runs)
@@ -110,9 +113,10 @@ async def update_sources_list_async(question_option, source_finder_name, run_id,
     # CSV for download
     # csv_data = df.to_csv(index=False)
     result_message = f"Found {len(source_runs)} results"
-    return df_display, stats, gr.Dropdown(choices=run_id_options, value=run_id), result_message,
 # Add a new function to handle row selection
@@ -182,7 +186,6 @@ async def main():
                 # Sidebar area
                 gr.Markdown("### About")
                 gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
-                gr.Markdown("Start by selecting a question, then optionally filter by source finder and run ID.")
                 gr.Markdown("### Statistics")
                 gr.Markdown(f"Total Questions: {len(questions)}")
@@ -204,6 +207,12 @@ async def main():
                          ],
                 interactive=False,
             )
         with gr.Row():
             gr.Markdown("# Sources Found")
         with gr.Row():
@@ -240,20 +249,20 @@ async def main():
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
-            outputs=[results_table, statistics_table, run_id_dropdown, result_text]
         )
         source_finder_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
-            outputs=[results_table, statistics_table, run_id_dropdown, result_text]
         )
         run_id_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
-            outputs=[results_table, statistics_table, run_id_dropdown, result_text]
         )

 import logging
 from data_access import get_questions, get_source_finders, get_run_ids, get_baseline_rankers, \
+    get_unified_sources, get_source_text, calculate_cumulative_statistics_for_all_questions, get_metadata
 logger = logging.getLogger(__name__)
 baseline_rankers_dict = {}
 baseline_ranker_options = []
 run_ids = []
+available_run_id_dict = {}
 finder_options = []
+previous_run_id = None
 run_id_dropdown = None
 # Main function to handle UI interactions
 async def update_sources_list_async(question_option, source_finder_name, run_id, baseline_ranker_name: str):
+    global available_run_id_dict
     if not question_option:
+        return gr.skip(), gr.skip(), gr.skip(), "No question selected", ""
     logger.info("processing update")
     if type(baseline_ranker_name) == list:
         baseline_ranker_name = baseline_ranker_name[0]
     if question_option == "All questions":
         if finder_id_int and type(run_id) == str:
+            run_id_int = available_run_id_dict.get(run_id)
+            all_stats = await calculate_cumulative_statistics_for_all_questions(run_id_int, baseline_ranker_id_int)
         else:
             all_stats = None
+        return None, all_stats, gr.skip(), "Select Run Id and source finder to see results", ""
     # Extract question ID from selection
     question_id = questions_dict.get(question_option)
+    available_run_id_dict = await get_run_ids(question_id, finder_id_int)
+    run_id_options = list(available_run_id_dict.keys())
     if run_id not in run_id_options:
         run_id = run_id_options[0]
+    run_id_int = available_run_id_dict.get(run_id)
     stats = None
     # Get source runs data
     if finder_id_int:
+        source_runs, stats = await get_unified_sources(question_id, run_id_int, baseline_ranker_id_int)
         # Create DataFrame for display
         df = pd.DataFrame(source_runs)
     # CSV for download
     # csv_data = df.to_csv(index=False)
+    metadata = await get_metadata(question_id, run_id_int)
     result_message = f"Found {len(source_runs)} results"
+    return df_display, stats, gr.Dropdown(choices=run_id_options, value=run_id), result_message, metadata
 # Add a new function to handle row selection
                 # Sidebar area
                 gr.Markdown("### About")
                 gr.Markdown("This tool allows you to explore source runs for Talmudic questions.")
                 gr.Markdown("### Statistics")
                 gr.Markdown(f"Total Questions: {len(questions)}")
                          ],
                 interactive=False,
             )
+        with gr.Row():
+            metadata_text = gr.TextArea(
+                label="Metadata of Source Finder for Selected Question",
+                elem_id="metadata",
+                lines = 2
+            )
         with gr.Row():
             gr.Markdown("# Sources Found")
         with gr.Row():
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
+            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
         source_finder_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
             # outputs=[run_id_dropdown, results_table, result_text, download_button]
+            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )
         run_id_dropdown.change(
             update_sources_list,
             inputs=[question_dropdown, source_finder_dropdown, run_id_dropdown, baseline_rankers_dropdown],
+            outputs=[results_table, statistics_table, run_id_dropdown, result_text, metadata_text]
         )

data_access.py CHANGED Viewed

@@ -35,6 +35,17 @@ async def get_questions():
         questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
         return [{"id": q["id"], "text": q["question_text"]} for q in questions]
 # Get distinct source finders
 async def get_source_finders():
@@ -44,32 +55,19 @@ async def get_source_finders():
 # Get distinct run IDs for a question
-async def get_run_ids(question_id: int):
-    async with get_async_connection() as conn:
-        query = "SELECT DISTINCT run_id FROM source_runs WHERE question_id = $1 order by run_id desc"
-        params = [question_id]
-        run_ids = await conn.fetch(query, *params)
-        return [r["run_id"] for r in run_ids]
-# Get source runs for a specific question with filters
-async def get_source_runs(question_id: int, source_finder_id: Optional[int] = None,
-                          run_id: Optional[int] = None):
     async with get_async_connection() as conn:
-        # Build query with filters
         query = """
-            SELECT sr.*, sf.source_finder_type as finder_name
-            FROM source_runs sr
-            JOIN source_finders sf ON sr.source_finder_id = sf.id
-            WHERE sr.question_id = $1 and sr.run_id = $2
-            AND sr.source_finder_id = $3
         """
-        params = [question_id, run_id, source_finder_id]
-        query += " ORDER BY sr.rank DESC"
-        sources = await conn.fetch(query, *params)
-        return [dict(s) for s in sources]
 async def get_baseline_rankers():
     async with get_async_connection() as conn:
@@ -110,13 +108,12 @@ async def calculate_baseline_vs_source_stats_for_question(baseline_sources , sou
         return results_df
-async def calculate_cumulative_statistics_for_all_questions(source_finder_id: int, run_id: int, ranker_id: int):
     """
     Calculate cumulative statistics across all questions for a specific source finder, run, and ranker.
     Args:
-        source_finder_id (int): ID of the source finder
-        run_id (int): Run ID to analyze
         ranker_id (int): ID of the baseline ranker
     Returns:
@@ -141,7 +138,7 @@ async def calculate_cumulative_statistics_for_all_questions(source_finder_id: in
         for question_id in question_ids:
             try:
                 # Get unified sources for this question
-                sources, stats = await get_unified_sources(question_id, source_finder_id, run_id, ranker_id)
                 if sources and len(sources) > 0:
                     valid_questions += 1
@@ -186,62 +183,68 @@ async def calculate_cumulative_statistics_for_all_questions(source_finder_id: in
         return pd.DataFrame([cumulative_stats])
-async def get_unified_sources(question_id: int, source_finder_id: int, run_id: int, ranker_id: int):
     """
     Create unified view of sources from both baseline_sources and source_runs
     with indicators of where each source appears and their respective ranks.
     """
     async with get_async_connection() as conn:
-        # Get sources from source_runs
-        query_runs = """
-            SELECT tb.tractate_chunk_id as id, sr.rank as source_rank, sr.tractate, sr.folio,
-                   sr.reason as source_reason, sr.metadata
-            FROM source_runs sr join talmud_bavli tb on sr.sugya_id = tb.xml_id
-            WHERE sr.question_id = $1 AND sr.source_finder_id = $2 AND sr.run_id = $3
-        """
-        source_runs = await conn.fetch(query_runs, question_id, source_finder_id, run_id)
-        # Get sources from baseline_sources
-        query_baseline = """
-            SELECT tb.tractate_chunk_id as id, bs.rank as baseline_rank, bs.tractate, bs.folio
-            FROM baseline_sources bs join talmud_bavli tb on bs.sugya_id = tb.xml_id
-            WHERE bs.question_id = $1 AND bs.ranker_id = $2
-        """
-        baseline_sources = await conn.fetch(query_baseline, question_id, ranker_id)
-        stats_df = await calculate_baseline_vs_source_stats_for_question(baseline_sources, source_runs)
-        # Convert to dictionaries for easier lookup
-        source_runs_dict = {s["id"]: dict(s) for s in source_runs}
-        baseline_dict = {s["id"]: dict(s) for s in baseline_sources}
-        # Get all unique sugya_ids
-        all_sugya_ids = set(source_runs_dict.keys()) | set(baseline_dict.keys())
-        # Build unified results
-        unified_results = []
-        for sugya_id in all_sugya_ids:
-            in_source_run = sugya_id in source_runs_dict
-            in_baseline = sugya_id in baseline_dict
-            if in_baseline:
-                info = baseline_dict[sugya_id]
-            else:
-                info = source_runs_dict[sugya_id]
-            result = {
-                "id": sugya_id,
-                "tractate": info.get("tractate"),
-                "folio": info.get("folio"),
-                "in_baseline": "Yes" if in_baseline else "No",
-                "baseline_rank": baseline_dict.get(sugya_id, {}).get("baseline_rank", "N/A"),
-                "in_source_run": "Yes" if in_source_run else "No",
-                "source_run_rank": source_runs_dict.get(sugya_id, {}).get("source_rank", "N/A"),
-                "source_reason": source_runs_dict.get(sugya_id, {}).get("reason", "N/A"),
-                "metadata": source_runs_dict.get(sugya_id, {}).get("metadata", "")
-            }
-            unified_results.append(result)
-        return unified_results, stats_df
 async def get_source_text(tractate_chunk_id: int):

         questions = await conn.fetch("SELECT id, question_text FROM questions ORDER BY id")
         return [{"id": q["id"], "text": q["question_text"]} for q in questions]
+async def get_metadata(question_id: int, source_finder_id_run_id: int):
+    async with get_async_connection() as conn:
+        metadata = await conn.fetchrow('''
+            SELECT metadata
+            FROM source_finder_run_question_metadata sfrqm
+            WHERE sfrqm.question_id = $1 and sfrqm.source_finder_run_id = $2;
+        ''', question_id, source_finder_id_run_id)
+        if metadata is None:
+            return ""
+        return metadata.get('metadata')
 # Get distinct source finders
 async def get_source_finders():
 # Get distinct run IDs for a question
+async def get_run_ids(question_id: int, source_finder_id: int):
     async with get_async_connection() as conn:
         query = """
+        select distinct sfr.description, srs.source_finder_run_id as run_id
+            from talmudexplore.source_run_results srs
+            join talmudexplore.source_finder_runs sfr on srs.source_finder_run_id = sfr.id
+            join talmudexplore.source_finders sf on sfr.source_finder_id = sf.id
+            where sfr.source_finder_id = $1
+            and srs.question_id = $2
         """
+        run_ids = await conn.fetch(query, source_finder_id, question_id)
+        return {r["description"]:r["run_id"] for r in run_ids}
 async def get_baseline_rankers():
     async with get_async_connection() as conn:
         return results_df
+async def calculate_cumulative_statistics_for_all_questions(source_finder_run_id: int, ranker_id: int):
     """
     Calculate cumulative statistics across all questions for a specific source finder, run, and ranker.
     Args:
+        source_finder_run_id (int): ID of the source finder and run as appears in source runs
         ranker_id (int): ID of the baseline ranker
     Returns:
         for question_id in question_ids:
             try:
                 # Get unified sources for this question
+                stats, sources = await get_stats(conn, question_id, ranker_id, source_finder_run_id)
                 if sources and len(sources) > 0:
                     valid_questions += 1
         return pd.DataFrame([cumulative_stats])
+async def get_unified_sources(question_id: int, source_finder_run_id: int, ranker_id: int):
     """
     Create unified view of sources from both baseline_sources and source_runs
     with indicators of where each source appears and their respective ranks.
     """
     async with get_async_connection() as conn:
+        stats_df, unified_results = await get_stats(conn, question_id, ranker_id, source_finder_run_id)
+        return unified_results, stats_df
+async def get_stats(conn, question_id, ranker_id, source_finder_run_id):
+    # Get sources from source_runs
+    query_runs = """
+                 SELECT tb.tractate_chunk_id as id,
+                        sr.rank              as source_rank,
+                        sr.tractate,
+                        sr.folio,
+                        sr.reason            as source_reason
+                 FROM source_run_results sr
+                          join talmud_bavli tb on sr.sugya_id = tb.xml_id
+                 WHERE sr.question_id = $1
+                   AND sr.source_finder_run_id = $2
+                 """
+    source_runs = await conn.fetch(query_runs, question_id, source_finder_run_id)
+    # Get sources from baseline_sources
+    query_baseline = """
+                     SELECT tb.tractate_chunk_id as id, bs.rank as baseline_rank, bs.tractate, bs.folio
+                     FROM baseline_sources bs
+                              join talmud_bavli tb on bs.sugya_id = tb.xml_id
+                     WHERE bs.question_id = $1
+                       AND bs.ranker_id = $2
+                     """
+    baseline_sources = await conn.fetch(query_baseline, question_id, ranker_id)
+    stats_df = await calculate_baseline_vs_source_stats_for_question(baseline_sources, source_runs)
+    # Convert to dictionaries for easier lookup
+    source_runs_dict = {s["id"]: dict(s) for s in source_runs}
+    baseline_dict = {s["id"]: dict(s) for s in baseline_sources}
+    # Get all unique sugya_ids
+    all_sugya_ids = set(source_runs_dict.keys()) | set(baseline_dict.keys())
+    # Build unified results
+    unified_results = []
+    for sugya_id in all_sugya_ids:
+        in_source_run = sugya_id in source_runs_dict
+        in_baseline = sugya_id in baseline_dict
+        if in_baseline:
+            info = baseline_dict[sugya_id]
+        else:
+            info = source_runs_dict[sugya_id]
+        result = {
+            "id": sugya_id,
+            "tractate": info.get("tractate"),
+            "folio": info.get("folio"),
+            "in_baseline": "Yes" if in_baseline else "No",
+            "baseline_rank": baseline_dict.get(sugya_id, {}).get("baseline_rank", "N/A"),
+            "in_source_run": "Yes" if in_source_run else "No",
+            "source_run_rank": source_runs_dict.get(sugya_id, {}).get("source_rank", "N/A"),
+            "source_reason": source_runs_dict.get(sugya_id, {}).get("reason", "N/A"),
+            "metadata": source_runs_dict.get(sugya_id, {}).get("metadata", "")
+        }
+        unified_results.append(result)
+    return stats_df, unified_results
 async def get_source_text(tractate_chunk_id: int):

eval_tables.py CHANGED Viewed

@@ -51,12 +51,35 @@ def create_eval_database():
         );
         ''')
     # Create table for logging all sources from each run
     cursor.execute('''
-    CREATE TABLE IF NOT EXISTS source_runs (
         id SERIAL PRIMARY KEY,
-        source_finder_id INTEGER NOT NULL,
-        run_id TEXT NOT NULL,
         run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
         question_id INTEGER NOT NULL,
         tractate TEXT NOT NULL,
@@ -64,7 +87,7 @@ def create_eval_database():
         sugya_id TEXT NOT NULL,
         rank INTEGER NOT NULL,
         reason TEXT,
-        FOREIGN KEY (source_finder_id) REFERENCES source_finders(id),
         FOREIGN KEY (question_id) REFERENCES questions(id)
     );
     ''')
@@ -99,8 +122,8 @@ def load_baseline_sources():
 if __name__ == '__main__':
     # Create the database
-    # create_eval_database()
-    load_baseline_sources()

         );
         ''')
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS source_finder_runs (
+            id SERIAL PRIMARY KEY,
+            run_id INTEGER NOT NULL,
+            source_finder_id INTEGER NOT NULL,
+            description TEXT,
+            FOREIGN KEY (source_finder_id) REFERENCES source_finders(id),
+            CONSTRAINT unique_source_per_run_id UNIQUE(run_id, source_finder_id)
+        );
+        ''')
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS source_finder_run_question_metadata (
+            id SERIAL PRIMARY KEY,
+            question_id INTEGER NOT NULL,
+            source_finder_run_id INTEGER NOT NULL,
+            metadata JSON,
+            FOREIGN KEY (source_finder_run_id) REFERENCES source_finder_runs(id),
+            FOREIGN KEY (question_id) REFERENCES questions(id),
+            CONSTRAINT unique_question_per_run_id UNIQUE(question_id, source_finder_run_id)
+        );
+        ''')
     # Create table for logging all sources from each run
     cursor.execute('''
+    CREATE TABLE IF NOT EXISTS source_run_results (
         id SERIAL PRIMARY KEY,
+        source_finder_run_id INTEGER NOT NULL,
         run_timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
         question_id INTEGER NOT NULL,
         tractate TEXT NOT NULL,
         sugya_id TEXT NOT NULL,
         rank INTEGER NOT NULL,
         reason TEXT,
+        FOREIGN KEY (source_finder_run_id) REFERENCES source_finder_runs(id),
         FOREIGN KEY (question_id) REFERENCES questions(id)
     );
     ''')
 if __name__ == '__main__':
     # Create the database
+    create_eval_database()
+    # load_baseline_sources()

scripts/__init__.py ADDED Viewed

File without changes

tests/test_db_layer.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import pandas as pd
 import pytest
-from data_access import calculate_cumulative_statistics_for_all_questions
 from data_access import get_unified_sources
@@ -20,9 +20,6 @@ async def test_get_unified_sources():
     # You can also check specific stats columns
     assert "overlap_count" in stats.columns, "Stats should contain overlap_count"
 @pytest.mark.asyncio
 async def test_calculate_cumulative_statistics_for_all_questions():
     # Test with known source_finder_id, run_id, and ranker_id
@@ -65,3 +62,47 @@ async def test_calculate_cumulative_statistics_for_all_questions():
     assert 0 <= result["overall_high_ranked_overlap_percentage"].iloc[
         0] <= 100, "High ranked overlap percentage should be between 0 and 100"

 import pandas as pd
 import pytest
+from data_access import calculate_cumulative_statistics_for_all_questions, get_metadata, get_run_ids
 from data_access import get_unified_sources
     # You can also check specific stats columns
     assert "overlap_count" in stats.columns, "Stats should contain overlap_count"
 @pytest.mark.asyncio
 async def test_calculate_cumulative_statistics_for_all_questions():
     # Test with known source_finder_id, run_id, and ranker_id
     assert 0 <= result["overall_high_ranked_overlap_percentage"].iloc[
         0] <= 100, "High ranked overlap percentage should be between 0 and 100"
+@pytest.mark.asyncio
+async def test_get_metadata_none_returned():
+    # Test with known source_finder_id, run_id, and ranker_id
+    source_finder_id = 1
+    run_id = 1
+    question_id = 1
+    # Call the function to test
+    result = await get_metadata(question_id, source_finder_id, run_id)
+    assert result == "", "Should return empty string when no metadata is found"
+@pytest.mark.asyncio
+async def test_get_metadata():
+    # Test with known source_finder_id, run_id, and ranker_id
+    source_finder_run_id = 4
+    question_id = 1
+    # Call the function to test
+    result = await get_metadata(question_id, source_finder_run_id)
+    assert result is not None, "Should return metadata when it exists"
+@pytest.mark.asyncio
+async def test_get_run_ids():
+    # Test with known question_id and source_finder_id
+    question_id = 2  # Using a question ID that exists in the test database
+    source_finder_id = 2  # Using a source finder ID that exists in the test database
+    # Call the function to test
+    result = await get_run_ids(question_id, source_finder_id)
+    # Verify the result is a dictionary
+    assert isinstance(result, dict), "Result should be a dictionary"
+    # Check that the dictionary is not empty (assuming there are run IDs for this question/source finder)
+    assert len(result) > 0, "Should return at least one run ID"
+    # Test with a non-existent question_id
+    non_existent_question_id = 9999
+    empty_result = await get_run_ids(non_existent_question_id, source_finder_id)
+    assert isinstance(empty_result, dict), "Should return an empty dictionary for non-existent question"
+    assert len(empty_result) == 0, "Should return empty dictionary for non-existent question"