USC-Applied-NLP-Group
/

SQL-Generation

TensorBoard

Safetensors

Model card Files Files and versions Metrics Training metrics Community

DeanGumas commited on Apr 3

Commit

8c47142

1 Parent(s): 32c1934

Initial attempt at fine-tuning using LoRA with basic cross-entropy loss

Browse files

Files changed (1) hide show

finetune_model.ipynb +524 -0

finetune_model.ipynb ADDED Viewed

	@@ -0,0 +1,524 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Finetune DeepSeek Coder 1.3B for NBA Kaggle Database SQLite Generation"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## First load data and convert to Dataset object tokenized by the DeepSeek model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
+      "\n",
+      "Total dataset examples: 1044\n",
+      "                                       natural_query  \\\n",
+      "0  Which NBA teams were established after the yea...   \n",
+      "1  What is the most points the Los Angeles Lakers...   \n",
+      "2  What is the second-highest number of points th...   \n",
+      "3  How many home games did the Golden State Warri...   \n",
+      "4  What is the average number of assists by the B...   \n",
+      "\n",
+      "                                           sql_query                result  \n",
+      "0  SELECT full_name FROM team WHERE year_founded ...  New Orleans Pelicans  \n",
+      "1  SELECT MAX(pts_home) FROM game  WHERE team_nam...                   162  \n",
+      "2  SELECT pts_home FROM game WHERE team_name_home...                   156  \n",
+      "3  SELECT COUNT(*)  FROM game  WHERE team_abbrevi...                    29  \n",
+      "4  SELECT AVG(ast_home)  FROM game  WHERE team_ab...           26.51355662  \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Map: 100%|██████████| 1044/1044 [00:00<00:00, 4433.07 examples/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "939\n",
+      "105\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import torch\n",
+    "from datasets import Dataset\n",
+    "from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig\n",
+    "from torch.utils.data import DataLoader\n",
+    "from peft import LoraConfig, get_peft_model, TaskType\n",
+    "import os\n",
+    "\n",
+    "# Load dataset\n",
+    "df = pd.read_csv(\"./train-data/sql_train.tsv\", sep='\\t')\n",
+    "\n",
+    "# Display dataset info\n",
+    "print(f\"Total dataset examples: {len(df)}\")\n",
+    "print(df.head())\n",
+    "\n",
+    "# Load tokenizer\n",
+    "model_name = \"./deepseek-coder-1.3b-instruct\"\n",
+    "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
+    "\n",
+    "# Preprocessing function\n",
+    "def preprocess_function(examples):\n",
+    "    \"\"\"\n",
+    "    Tokenizes input natural language queries and corresponding SQL queries.\n",
+    "    \"\"\"\n",
+    "    inputs = [\"Translate to SQL: \" + q for q in examples[\"natural_query\"]]\n",
+    "    targets = examples[\"sql_query\"]\n",
+    "\n",
+    "    model_inputs = tokenizer(inputs, padding=\"max_length\", truncation=True, max_length=256)\n",
+    "    labels = tokenizer(targets, padding=\"max_length\", truncation=True, max_length=256)\n",
+    "\n",
+    "    model_inputs[\"labels\"] = labels[\"input_ids\"]\n",
+    "    return model_inputs\n",
+    "\n",
+    "# Convert to Hugging Face Dataset\n",
+    "dataset = Dataset.from_pandas(df)\n",
+    "\n",
+    "# Apply tokenization\n",
+    "tokenized_dataset = dataset.map(preprocess_function, batched=True)\n",
+    "\n",
+    "# Split into train/validation\n",
+    "split = int(0.9 * len(tokenized_dataset))  # 90% train, 10% validation\n",
+    "train_dataset = tokenized_dataset.select(range(split))\n",
+    "val_dataset = tokenized_dataset.select(range(split, len(tokenized_dataset)))\n",
+    "\n",
+    "print(len(train_dataset))\n",
+    "print(len(val_dataset))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load model and define training arguments"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "trainable params: 6,291,456 || all params: 1,352,763,392 || trainable%: 0.4651\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Enable 8-bit quantization for lower memory usage\n",
+    "bnb_config = BitsAndBytesConfig(\n",
+    "    load_in_8bit=True, \n",
+    "    bnb_8bit_compute_dtype=torch.float16\n",
+    ")\n",
+    "\n",
+    "# Load model with quantization\n",
+    "#device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "device_name = 'cuda:0' if torch.cuda.is_available() else 'cpu'\n",
+    "device = torch.device(device_name)\n",
+    "model = AutoModelForCausalLM.from_pretrained(\n",
+    "    model_name, \n",
+    "    quantization_config=bnb_config,\n",
+    "    device_map=device\n",
+    ")\n",
+    "model.generation_config.pad_token_id = tokenizer.pad_token_id\n",
+    "\n",
+    "# Define LoRA configuration\n",
+    "lora_config = LoraConfig(\n",
+    "    r=16,  # Rank of LoRA matrices (adjust for memory vs. accuracy)\n",
+    "    lora_alpha=32,  # Scaling factor\n",
+    "    lora_dropout=0.1,  # Dropout for regularization\n",
+    "    bias=\"none\",\n",
+    "    task_type=TaskType.CAUSAL_LM,\n",
+    "    target_modules=[\n",
+    "        \"q_proj\",\n",
+    "        \"k_proj\",\n",
+    "        \"v_proj\",\n",
+    "        \"o_proj\"\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "# Wrap model with LoRA adapters\n",
+    "model = get_peft_model(model, lora_config)\n",
+    "model = model.to(device)\n",
+    "model.print_trainable_parameters()  # Show trainable parameters count"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Define prompt for model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_prompt = \"\"\"You are an AI assistant that converts natural language queries into valid SQLite queries.\n",
+    "Database Schema and Explanations\n",
+    "\n",
+    "team Table\n",
+    "Stores information about NBA teams.\n",
+    "CREATE TABLE IF NOT EXISTS \"team\" (\n",
+    "  \"id\" TEXT PRIMARY KEY,      -- Unique identifier for the team\n",
+    "  \"full_name\" TEXT,           -- Full official name of the team (e.g., \"Los Angeles Lakers\")\n",
+    "  \"abbreviation\" TEXT,        -- Shortened team name (e.g., \"LAL\")\n",
+    "  \"nickname\" TEXT,            -- Commonly used nickname for the team (e.g., \"Lakers\")\n",
+    "  \"city\" TEXT,                -- City where the team is based\n",
+    "  \"state\" TEXT,               -- State where the team is located\n",
+    "  \"year_founded\" REAL         -- Year the team was established\n",
+    ");\n",
+    "\n",
+    "game Table\n",
+    "Contains detailed statistics for each NBA game, including home and away team performance.\n",
+    "CREATE TABLE IF NOT EXISTS \"game\" (\n",
+    "  \"season_id\" TEXT,            -- Season identifier, formatted as \"2YYYY\" (e.g., \"21970\" for the 1970 season)\n",
+    "  \"team_id_home\" TEXT,         -- ID of the home team (matches \"id\" in team table)\n",
+    "  \"team_abbreviation_home\" TEXT, -- Abbreviation of the home team\n",
+    "  \"team_name_home\" TEXT,       -- Full name of the home team\n",
+    "  \"game_id\" TEXT PRIMARY KEY,  -- Unique identifier for the game\n",
+    "  \"game_date\" TIMESTAMP,       -- Date the game was played (YYYY-MM-DD format)\n",
+    "  \"matchup_home\" TEXT,         -- Matchup details including opponent (e.g., \"LAL vs. BOS\")\n",
+    "  \"wl_home\" TEXT,              -- \"W\" if the home team won, \"L\" if they lost\n",
+    "  \"min\" INTEGER,               -- Total minutes played in the game\n",
+    "  \"fgm_home\" REAL,             -- Field goals made by the home team\n",
+    "  \"fga_home\" REAL,             -- Field goals attempted by the home team\n",
+    "  \"fg_pct_home\" REAL,          -- Field goal percentage of the home team\n",
+    "  \"fg3m_home\" REAL,            -- Three-point field goals made by the home team\n",
+    "  \"fg3a_home\" REAL,            -- Three-point attempts by the home team\n",
+    "  \"fg3_pct_home\" REAL,         -- Three-point field goal percentage of the home team\n",
+    "  \"ftm_home\" REAL,             -- Free throws made by the home team\n",
+    "  \"fta_home\" REAL,             -- Free throws attempted by the home team\n",
+    "  \"ft_pct_home\" REAL,          -- Free throw percentage of the home team\n",
+    "  \"oreb_home\" REAL,            -- Offensive rebounds by the home team\n",
+    "  \"dreb_home\" REAL,            -- Defensive rebounds by the home team\n",
+    "  \"reb_home\" REAL,             -- Total rebounds by the home team\n",
+    "  \"ast_home\" REAL,             -- Assists by the home team\n",
+    "  \"stl_home\" REAL,             -- Steals by the home team\n",
+    "  \"blk_home\" REAL,             -- Blocks by the home team\n",
+    "  \"tov_home\" REAL,             -- Turnovers by the home team\n",
+    "  \"pf_home\" REAL,              -- Personal fouls by the home team\n",
+    "  \"pts_home\" REAL,             -- Total points scored by the home team\n",
+    "  \"plus_minus_home\" INTEGER,   -- Plus/minus rating for the home team\n",
+    "  \"video_available_home\" INTEGER, -- Indicates whether video is available (1 = Yes, 0 = No)\n",
+    "  \"team_id_away\" TEXT,         -- ID of the away team\n",
+    "  \"team_abbreviation_away\" TEXT, -- Abbreviation of the away team\n",
+    "  \"team_name_away\" TEXT,       -- Full name of the away team\n",
+    "  \"matchup_away\" TEXT,         -- Matchup details from the away team’s perspective\n",
+    "  \"wl_away\" TEXT,              -- \"W\" if the away team won, \"L\" if they lost\n",
+    "  \"fgm_away\" REAL,             -- Field goals made by the away team\n",
+    "  \"fga_away\" REAL,             -- Field goals attempted by the away team\n",
+    "  \"fg_pct_away\" REAL,          -- Field goal percentage of the away team\n",
+    "  \"fg3m_away\" REAL,            -- Three-point field goals made by the away team\n",
+    "  \"fg3a_away\" REAL,            -- Three-point attempts by the away team\n",
+    "  \"fg3_pct_away\" REAL,         -- Three-point field goal percentage of the away team\n",
+    "  \"ftm_away\" REAL,             -- Free throws made by the away team\n",
+    "  \"fta_away\" REAL,             -- Free throws attempted by the away team\n",
+    "  \"ft_pct_away\" REAL,          -- Free throw percentage of the away team\n",
+    "  \"oreb_away\" REAL,            -- Offensive rebounds by the away team\n",
+    "  \"dreb_away\" REAL,            -- Defensive rebounds by the away team\n",
+    "  \"reb_away\" REAL,             -- Total rebounds by the away team\n",
+    "  \"ast_away\" REAL,             -- Assists by the away team\n",
+    "  \"stl_away\" REAL,             -- Steals by the away team\n",
+    "  \"blk_away\" REAL,             -- Blocks by the away team\n",
+    "  \"tov_away\" REAL,             -- Turnovers by the away team\n",
+    "  \"pf_away\" REAL,              -- Personal fouls by the away team\n",
+    "  \"pts_away\" REAL,             -- Total points scored by the away team\n",
+    "  \"plus_minus_away\" INTEGER,   -- Plus/minus rating for the away team\n",
+    "  \"video_available_away\" INTEGER, -- Indicates whether video is available (1 = Yes, 0 = No)\n",
+    "  \"season_type\" TEXT           -- Regular season or playoffs\n",
+    ");\n",
+    "\n",
+    "other_stats Table\n",
+    "Stores additional statistics, linked to the game table via game_id.\n",
+    "CREATE TABLE IF NOT EXISTS \"other_stats\" (\n",
+    "  \"game_id\" TEXT,             -- Unique game identifier, matches id column from game table\n",
+    "  \"league_id\" TEXT,           -- League identifier\n",
+    "  \"team_id_home\" TEXT,        -- Home team identifier\n",
+    "  \"team_abbreviation_home\" TEXT, -- Home team abbreviation\n",
+    "  \"team_city_home\" TEXT,      -- Home team city\n",
+    "  \"pts_paint_home\" INTEGER,   -- Points in the paint by the home team\n",
+    "  \"pts_2nd_chance_home\" INTEGER, -- Second chance points by the home team\n",
+    "  \"pts_fb_home\" INTEGER,      -- Fast break points by the home team\n",
+    "  \"largest_lead_home\" INTEGER,-- Largest lead by the home team\n",
+    "  \"lead_changes\" INTEGER,     -- Number of lead changes \n",
+    "  \"times_tied\" INTEGER,       -- Number of times the score was tied\n",
+    "  \"team_turnovers_home\" INTEGER, -- Home team turnovers\n",
+    "  \"total_turnovers_home\" INTEGER, -- Total turnovers by the home team\n",
+    "  \"team_rebounds_home\" INTEGER, -- Home team rebounds\n",
+    "  \"pts_off_to_home\" INTEGER,  -- Points off turnovers by the home team\n",
+    "  \"team_id_away\" TEXT,        -- Away team identifier\n",
+    "  \"team_abbreviation_away\" TEXT,  -- Away team abbreviation\n",
+    "  \"pts_paint_away\" INTEGER,   -- Points in the paint by the away team\n",
+    "  \"pts_2nd_chance_away\" INTEGER, -- Second chance points by the away team\n",
+    "  \"pts_fb_away\" INTEGER,      -- Fast break points by the away team\n",
+    "  \"largest_lead_away\" INTEGER,-- Largest lead by the away team\n",
+    "  \"team_turnovers_away\" INTEGER, -- Away team turnovers\n",
+    "  \"total_turnovers_away\" INTEGER, -- Total turnovers by the away team\n",
+    "  \"team_rebounds_away\" INTEGER, -- Away team rebounds\n",
+    "  \"pts_off_to_away\" INTEGER   -- Points off turnovers by the away team\n",
+    ");\n",
+    "\n",
+    "\n",
+    "Team Name Information\n",
+    "In the plaintext user questions, only the full team names will be used, but in the queries you may use the full team names or the abbreviations. \n",
+    "The full team names can be used with the game table, while the abbreviations should be used with the other_stats table.\n",
+    "Notice they are separated by the | character in the following list:\n",
+    "\n",
+    "Atlanta Hawks|ATL\n",
+    "Boston Celtics|BOS\n",
+    "Cleveland Cavaliers|CLE\n",
+    "New Orleans Pelicans|NOP\n",
+    "Chicago Bulls|CHI\n",
+    "Dallas Mavericks|DAL\n",
+    "Denver Nuggets|DEN\n",
+    "Golden State Warriors|GSW\n",
+    "Houston Rockets|HOU\n",
+    "Los Angeles Clippers|LAC\n",
+    "Los Angeles Lakers|LAL\n",
+    "Miami Heat|MIA\n",
+    "Milwaukee Bucks|MIL\n",
+    "Minnesota Timberwolves|MIN\n",
+    "Brooklyn Nets|BKN\n",
+    "New York Knicks|NYK\n",
+    "Orlando Magic|ORL\n",
+    "Indiana Pacers|IND\n",
+    "Philadelphia 76ers|PHI\n",
+    "Phoenix Suns|PHX\n",
+    "Portland Trail Blazers|POR\n",
+    "Sacramento Kings|SAC\n",
+    "San Antonio Spurs|SAS\n",
+    "Oklahoma City Thunder|OKC\n",
+    "Toronto Raptors|TOR\n",
+    "Utah Jazz|UTA\n",
+    "Memphis Grizzlies|MEM\n",
+    "Washington Wizards|WAS\n",
+    "Detroit Pistons|DET\n",
+    "Charlotte Hornets|CHA\n",
+    "\n",
+    "Query Guidelines\n",
+    "Use team_name_home and team_name_away to match teams to the game table. Use team_abbreviation_home and team_abbreviation away to match teams to the other_stats table.\n",
+    "\n",
+    "To filter by season, use season_id = '2YYYY'.\n",
+    "\n",
+    "Example: To get statistics from 2005, use a statement like: season_id = '22005'. To get statistics from 1972, use a statement like: season_id = \"21972\". To get statistics from 2015, use a statement like: season_id = \"22015\".\n",
+    "\n",
+    "Ensure queries return relevant columns and avoid unnecessary joins.\n",
+    "\n",
+    "Example User Requests and SQLite Queries\n",
+    "Request:\n",
+    "\"What is the most points the Los Angeles Lakers have ever scored at home?\"\n",
+    "SQLite:\n",
+    "SELECT MAX(pts_home) \n",
+    "FROM game \n",
+    "WHERE team_name_home = 'Los Angeles Lakers';\n",
+    "\n",
+    "Request:\n",
+    "\"Which teams are located in the state of California?\"\n",
+    "SQLite:\n",
+    "SELECT full_name FROM team WHERE state = 'California';\n",
+    "\n",
+    "Request:\n",
+    "\"Which team had the highest number of team turnovers in an away game?\"\n",
+    "SQLite:\n",
+    "SELECT team_abbreviation_away FROM other_stats ORDER BY team_turnovers_away DESC LIMIT 1;\n",
+    "\n",
+    "Request:\n",
+    "\"Which teams were founded before 1979?\"\n",
+    "SQLite:\n",
+    "SELECT full_name FROM team WHERE year_founded < 1979;\n",
+    "\n",
+    "Request:\n",
+    "\"Find the Boston Celtics largest home victory margin in the 2008 season.\"\n",
+    "SQLite:\n",
+    "SELECT MAX(pts_home - pts_away) AS biggest_win\n",
+    "FROM game\n",
+    "WHERE team_name_home = 'Boston Celtics' AND season_id = '22008';\n",
+    "\n",
+    "Generate only the SQLite query prefaced by SQLite: and no other text, do not output an explanation of the query. Now generate an SQLite query for the following user request. Request:\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup model trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\training_args.py:1611: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\Dean\\AppData\\Local\\Temp\\ipykernel_12256\\3557190339.py:17: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Trainer.__init__`. Use `processing_class` instead.\n",
+      "  trainer = Trainer(\n",
+      "No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.\n"
+     ]
+    }
+   ],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"./fine-tuned-model\",\n",
+    "    evaluation_strategy=\"epoch\",  # Evaluate at the end of each epoch\n",
+    "    save_strategy=\"epoch\",  # Save model every epoch\n",
+    "    per_device_train_batch_size=8,  # LoRA allows higher batch size\n",
+    "    per_device_eval_batch_size=8,\n",
+    "    num_train_epochs=3,  # Increase if needed\n",
+    "    learning_rate=5e-4,  # Higher LR since we're only training LoRA layers\n",
+    "    weight_decay=0.01,\n",
+    "    logging_steps=50,  # Print loss every 50 steps\n",
+    "    save_total_limit=2,  # Keep last 2 checkpoints\n",
+    "    fp16=True if torch.cuda.is_available() else False,\n",
+    "    push_to_hub=False\n",
+    ")\n",
+    "\n",
+    "# Trainer setup\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=train_dataset,\n",
+    "    eval_dataset=val_dataset,\n",
+    "    tokenizer=tokenizer\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run fine-tuning and save model weights when complete"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\integrations\\sdpa_attention.py:54: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\actions-runner\\_work\\pytorch\\pytorch\\builder\\windows\\pytorch\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:555.)\n",
+      "  attn_output = torch.nn.functional.scaled_dot_product_attention(\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "\n",
+       "    <div>\n",
+       "      \n",
+       "      <progress value='6' max='354' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [  6/354 00:03 < 05:16, 1.10 it/s, Epoch 0.04/3]\n",
+       "    </div>\n",
+       "    <table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       " <tr style=\"text-align: left;\">\n",
+       "      <th>Epoch</th>\n",
+       "      <th>Training Loss</th>\n",
+       "      <th>Validation Loss</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table><p>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[5], line 2\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;66;03m# Run training\u001b[39;00m\n\u001b[1;32m----> 2\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      4\u001b[0m \u001b[38;5;66;03m# Save model and tokenizer weights\u001b[39;00m\n\u001b[0;32m      5\u001b[0m model\u001b[38;5;241m.\u001b[39msave_pretrained(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m./fine-tuned-model\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\trainer.py:2245\u001b[0m, in \u001b[0;36mTrainer.train\u001b[1;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[0;32m   2243\u001b[0m         hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[0;32m   2244\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m-> 2245\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   2246\u001b[0m \u001b[43m        \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2247\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2248\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2249\u001b[0m \u001b[43m        \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2250\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\Dean\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\transformers\\trainer.py:2561\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[1;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[0;32m   2555\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m context():\n\u001b[0;32m   2556\u001b[0m     tr_loss_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtraining_step(model, inputs, num_items_in_batch)\n\u001b[0;32m   2558\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m   2559\u001b[0m     args\u001b[38;5;241m.\u001b[39mlogging_nan_inf_filter\n\u001b[0;32m   2560\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_torch_xla_available()\n\u001b[1;32m-> 2561\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m (torch\u001b[38;5;241m.\u001b[39misnan(tr_loss_step) \u001b[38;5;129;01mor\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misinf\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtr_loss_step\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m   2562\u001b[0m ):\n\u001b[0;32m   2563\u001b[0m     \u001b[38;5;66;03m# if loss is nan or inf simply add the average of previous logged losses\u001b[39;00m\n\u001b[0;32m   2564\u001b[0m     tr_loss \u001b[38;5;241m=\u001b[39m tr_loss \u001b[38;5;241m+\u001b[39m tr_loss \u001b[38;5;241m/\u001b[39m (\u001b[38;5;241m1\u001b[39m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mglobal_step \u001b[38;5;241m-\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_globalstep_last_logged)\n\u001b[0;32m   2565\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "# Run training\n",
+    "trainer.train()\n",
+    "\n",
+    "# Save model and tokenizer weights\n",
+    "model.save_pretrained(\"./fine-tuned-model\")\n",
+    "tokenizer.save_pretrained(\"./fine-tuned-model\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}