Updated compare_result function to allow passing the cursor, also re-ran test_pretrained and test_rag with updated loss function

Browse files

Files changed (5) hide show

src/evaluation/__pycache__/compare_result.cpython-312.pyc +0 -0
src/evaluation/compare_result.py +1 -6
src/prompts/__pycache__/prompt.cpython-312.pyc +0 -0
test_pretrained.ipynb +49 -145
test_rag.ipynb +1 -1

src/evaluation/__pycache__/compare_result.cpython-312.pyc CHANGED Viewed

Binary files a/src/evaluation/__pycache__/compare_result.cpython-312.pyc and b/src/evaluation/__pycache__/compare_result.cpython-312.pyc differ

src/evaluation/compare_result.py CHANGED Viewed

@@ -1,11 +1,6 @@
 import math
-import sqlite3 as sql
-def compare_result(sample_query, sample_result, query_output):
-    # Create connection to sqlite3 database
-    connection = sql.connect('./nba-data/nba.sqlite')
-    cursor = connection.cursor()
     # Clean model output to only have the query output
     if query_output[0:8] == "SQLite:\n":
         query = query_output[8:]

 import math
+def compare_result(cursor, sample_query, sample_result, query_output):
     # Clean model output to only have the query output
     if query_output[0:8] == "SQLite:\n":
         query = query_output[8:]

src/prompts/__pycache__/prompt.cpython-312.pyc ADDED Viewed

Binary file (9.22 kB). View file

test_pretrained.ipynb CHANGED Viewed

@@ -9,7 +9,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -26,7 +26,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,7 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -73,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -83,9 +83,9 @@
       "Total dataset examples: 1044\n",
       "\n",
       "\n",
-      "How many points did the Phoenix Suns score in the highest scoring away game they played?\n",
-      "SELECT MAX(pts_away) FROM game WHERE team_abbreviation_away = 'PHX';\n",
-      "161.0\n"
      ]
     }
    ],
@@ -111,16 +111,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Set device to cuda if available, otherwise CPU\n",
-    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
     "\n",
     "# Load model and tokenizer\n",
-    "tokenizer = AutoTokenizer.from_pretrained(get_path(\"deepseek-coder-1.3b-instruct\"))\n",
-    "model = AutoModelForCausalLM.from_pretrained(get_path(\"deepseek-coder-1.3b-instruct\"), torch_dtype=torch.bfloat16, device_map=device) \n",
     "model.generation_config.pad_token_id = tokenizer.pad_token_id"
    ]
   },
@@ -133,7 +137,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -141,7 +145,7 @@
      "output_type": "stream",
      "text": [
       "SQLite:\n",
-      "SELECT team_abbreviation_home FROM other_stats WHERE lead_changes = 1 AND season_id = '2001';\n",
       "\n"
      ]
     }
@@ -166,14 +170,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "cleaned\n"
      ]
     }
    ],
@@ -209,18 +214,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
-     "ename": "ImportError",
-     "evalue": "cannot import name 'compare_result_two' from 'src.evaluation.compare_result' (/Users/esteban/Documents/USC/spring_2025/NLP/SQL-Generation/src/evaluation/compare_result.py)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[30], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmath\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mevaluation\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcompare_result\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m compare_result_two\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcompare_result\u001b[39m(sample_query, sample_result, query_output):\n\u001b[1;32m      5\u001b[0m     \u001b[38;5;66;03m# Clean model output to only have the query output\u001b[39;00m\n\u001b[1;32m      6\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m query_output[\u001b[38;5;241m0\u001b[39m:\u001b[38;5;241m7\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSQLite:\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
-      "\u001b[0;31mImportError\u001b[0m: cannot import name 'compare_result_two' from 'src.evaluation.compare_result' (/Users/esteban/Documents/USC/spring_2025/NLP/SQL-Generation/src/evaluation/compare_result.py)"
      ]
     }
    ],
@@ -256,7 +265,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -304,7 +313,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -317,9 +326,9 @@
       "Completed 200\n",
       "\n",
       "Less than 90 results:\n",
-      "Percent valid: 0.8448979591836735\n",
-      "Percent SQLite matched: 0.43673469387755104\n",
-      "Percent result matched: 0.6530612244897959\n",
       "Dataset length: 245\n"
      ]
     }
@@ -341,36 +350,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Completed 50\n",
-      "Completed 100\n",
-      "Completed 150\n",
-      "Completed 200\n",
-      "Completed 250\n",
-      "Completed 300\n",
-      "Completed 350\n",
-      "Completed 400\n",
-      "Completed 450\n",
-      "Completed 500\n",
-      "Completed 550\n",
-      "Completed 600\n",
-      "Completed 650\n",
-      "Completed 700\n",
-      "Completed 750\n",
-      "Completed 800\n",
-      "\n",
-      "Queries from game results:\n",
-      "Percent valid: 0.7613365155131265\n",
-      "Percent SQLite matched: 0.13842482100238662\n",
-      "Percent result matched: 0.383054892601432\n",
-      "Dataset length: 838\n"
-     ]
-    }
-   ],
    "source": [
     "game_queries = pd.read_csv(get_path(\"train-data/queries_from_game.tsv\"), sep='\\t')\n",
     "run_evaluation(game_queries, \"Queries from game\")\n",
@@ -388,23 +368,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Completed 50\n",
-      "Completed 100\n",
-      "Completed 150\n",
-      "\n",
-      "Queries from other stats results:\n",
-      "Percent valid: 0.21428571428571427\n",
-      "Percent SQLite matched: 0.01948051948051948\n",
-      "Percent result matched: 0.07142857142857142\n",
-      "Dataset length: 154\n"
-     ]
-    }
-   ],
    "source": [
     "other_stats_queries = pd.read_csv(get_path(\"train-data/queries_from_other_stats.tsv\"), sep='\\t')\n",
     "run_evaluation(other_stats_queries, \"Queries from other stats\")\n",
@@ -422,21 +386,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Completed 50\n",
-      "\n",
-      "Queries from team results:\n",
-      "Percent valid: 0.8653846153846154\n",
-      "Percent SQLite matched: 0.5961538461538461\n",
-      "Percent result matched: 0.7884615384615384\n",
-      "Dataset length: 52\n"
-     ]
-    }
-   ],
    "source": [
     "team_queries = pd.read_csv(get_path(\"train-data/queries_from_team.tsv\"), sep='\\t')\n",
     "run_evaluation(team_queries, \"Queries from team\")\n",
@@ -454,23 +404,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Completed 50\n",
-      "Completed 100\n",
-      "Completed 150\n",
-      "\n",
-      "Queries with join results:\n",
-      "Percent valid: 0.1945945945945946\n",
-      "Percent SQLite matched: 0.0\n",
-      "Percent result matched: 0.04864864864864865\n",
-      "Dataset length: 185\n"
-     ]
-    }
-   ],
    "source": [
     "join_queries = pd.read_csv(get_path(\"train-data/with_join.tsv\"), sep='\\t')\n",
     "run_evaluation(join_queries, \"Queries with join\")\n",
@@ -488,37 +422,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Completed 50\n",
-      "Completed 100\n",
-      "Completed 150\n",
-      "Completed 200\n",
-      "Completed 250\n",
-      "Completed 300\n",
-      "Completed 350\n",
-      "Completed 400\n",
-      "Completed 450\n",
-      "Completed 500\n",
-      "Completed 550\n",
-      "Completed 600\n",
-      "Completed 650\n",
-      "Completed 700\n",
-      "Completed 750\n",
-      "Completed 800\n",
-      "Completed 850\n",
-      "\n",
-      "Queries without join results:\n",
-      "Percent valid: 0.7916181606519208\n",
-      "Percent SQLite matched: 0.17462165308498254\n",
-      "Percent result matched: 0.42374854481955765\n",
-      "Dataset length: 859\n"
-     ]
-    }
-   ],
    "source": [
     "no_join_queries = pd.read_csv(get_path(\"train-data/without_join.tsv\"), sep='\\t')\n",
     "run_evaluation(no_join_queries, \"Queries without join\")\n",
@@ -534,7 +438,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -563,9 +467,9 @@
       "Completed 1000\n",
       "\n",
       "All training data results:\n",
-      "Percent valid: 0.685823754789272\n",
       "Percent SQLite matched: 0.14367816091954022\n",
-      "Percent result matched: 0.35823754789272033\n",
       "Dataset length: 1044\n"
      ]
     }
@@ -579,7 +483,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "CSCI544",
    "language": "python",
    "name": "python3"
   },
@@ -593,7 +497,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.11"
   }
  },
  "nbformat": 4,

   },
   {
    "cell_type": "code",
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
       "Total dataset examples: 1044\n",
       "\n",
       "\n",
+      "How many times were games tied when the Indiana Pacers played at home?\n",
+      "SELECT SUM(times_tied) as total_times_tied  FROM other_stats  WHERE team_abbreviation_home = 'IND';\n",
+      "4805.0\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Set device to cuda if available, otherwise CPU\n",
+    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
     "\n",
     "# Load model and tokenizer\n",
+    "if is_google_colab:\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(get_path(\"deepseek-coder-1.3b-instruct\"))\n",
+    "    model = AutoModelForCausalLM.from_pretrained(get_path(\"deepseek-coder-1.3b-instruct\"), torch_dtype=torch.bfloat16, device_map=device) \n",
+    "else:\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(\"./deepseek-coder-1.3b-instruct\")\n",
+    "    model = AutoModelForCausalLM.from_pretrained(\"./deepseek-coder-1.3b-instruct\", torch_dtype=torch.bfloat16, device_map=device) \n",
     "model.generation_config.pad_token_id = tokenizer.pad_token_id"
    ]
   },
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "output_type": "stream",
      "text": [
       "SQLite:\n",
+      "SELECT COUNT(*) FROM game WHERE team_name_home = 'Indiana Pacers' AND wl_home = 'T';\n",
       "\n"
      ]
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "cleaned\n",
+      "(0,)\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "What is the year the Milwaukee team was founded?\n",
+      "SELECT year_founded FROM team WHERE city = 'Milwaukee';\n",
+      "1968.0\n",
+      "SQLite:\n",
+      "SELECT year_founded FROM team WHERE full_name = 'Milwaukee Bucks';\n",
+      "\n",
+      "Statement valid? True\n",
+      "SQLite matched? False\n",
+      "Result matched? True\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
       "Completed 200\n",
       "\n",
       "Less than 90 results:\n",
+      "Percent valid: 0.8734693877551021\n",
+      "Percent SQLite matched: 0.4448979591836735\n",
+      "Percent result matched: 0.6979591836734694\n",
       "Dataset length: 245\n"
      ]
     }
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "game_queries = pd.read_csv(get_path(\"train-data/queries_from_game.tsv\"), sep='\\t')\n",
     "run_evaluation(game_queries, \"Queries from game\")\n",
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "other_stats_queries = pd.read_csv(get_path(\"train-data/queries_from_other_stats.tsv\"), sep='\\t')\n",
     "run_evaluation(other_stats_queries, \"Queries from other stats\")\n",
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "team_queries = pd.read_csv(get_path(\"train-data/queries_from_team.tsv\"), sep='\\t')\n",
     "run_evaluation(team_queries, \"Queries from team\")\n",
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "join_queries = pd.read_csv(get_path(\"train-data/with_join.tsv\"), sep='\\t')\n",
     "run_evaluation(join_queries, \"Queries with join\")\n",
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "no_join_queries = pd.read_csv(get_path(\"train-data/without_join.tsv\"), sep='\\t')\n",
     "run_evaluation(no_join_queries, \"Queries without join\")\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
       "Completed 1000\n",
       "\n",
       "All training data results:\n",
+      "Percent valid: 0.7097701149425287\n",
       "Percent SQLite matched: 0.14367816091954022\n",
+      "Percent result matched: 0.3668582375478927\n",
       "Dataset length: 1044\n"
      ]
     }
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
+   "version": "3.12.6"
   }
  },
  "nbformat": 4,

test_rag.ipynb CHANGED Viewed

@@ -375,7 +375,7 @@
     "            actual_result = \"Error executing query: \" + str(e)\n",
     "        \n",
     "        # Compare the ground truth query and expected result to the generated query and actual result.\n",
-    "        valid, sql_matched, result_matched = compare_result(row[\"sql_query\"], row[\"result\"], generated_query)\n",
     "        print(\"=============================================\")\n",
     "        print(f\"Overall Valid: {valid}\")\n",
     "        print(f\"SQL Query Matched: {sql_matched}\")\n",

     "            actual_result = \"Error executing query: \" + str(e)\n",
     "        \n",
     "        # Compare the ground truth query and expected result to the generated query and actual result.\n",
+    "        valid, sql_matched, result_matched = compare_result(cursor, row[\"sql_query\"], row[\"result\"], generated_query)\n",
     "        print(\"=============================================\")\n",
     "        print(f\"Overall Valid: {valid}\")\n",
     "        print(f\"SQL Query Matched: {sql_matched}\")\n",