{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "import mediapipe as mp\n",
    "import cv2\n",
    "import pandas as pd\n",
    "import pickle\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.calibration import CalibratedClassifierCV\n",
    "from sklearn.linear_model import LogisticRegression, SGDClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "\n",
    "from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Drawing helpers\n",
    "mp_drawing = mp.solutions.drawing_utils\n",
    "mp_pose = mp.solutions.pose"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Train Model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.1. Describe data and split dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rescale_frame(frame, percent=50):\n",
    "    '''\n",
    "    Rescale a frame to a certain percentage compare to its original frame\n",
    "    '''\n",
    "    width = int(frame.shape[1] * percent/ 100)\n",
    "    height = int(frame.shape[0] * percent/ 100)\n",
    "    dim = (width, height)\n",
    "    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)\n",
    "\n",
    "\n",
    "def describe_dataset(dataset_path: str):\n",
    "    '''\n",
    "    Describe dataset\n",
    "    '''\n",
    "\n",
    "    data = pd.read_csv(dataset_path)\n",
    "    print(f\"Headers: {list(data.columns.values)}\")\n",
    "    print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n",
    "    print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n",
    "    print(f\"Missing values: {data.isnull().values.any()}\\n\")\n",
    "    \n",
    "    duplicate = data[data.duplicated()]\n",
    "    print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n",
    "\n",
    "    return data\n",
    "\n",
    "\n",
    "def round_up_metric_results(results) -> list:\n",
    "    '''Round up metrics results such as precision score, recall score, ...'''\n",
    "    return list(map(lambda el: round(el, 3), results))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']\n",
      "Number of rows: 28520 \n",
      "Number of columns: 69\n",
      "\n",
      "Labels: \n",
      "C    9904\n",
      "L    9546\n",
      "H    9070\n",
      "Name: label, dtype: int64\n",
      "\n",
      "Missing values: False\n",
      "\n",
      "Duplicate Rows : 0\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>label</th>\n",
       "      <th>nose_x</th>\n",
       "      <th>nose_y</th>\n",
       "      <th>nose_z</th>\n",
       "      <th>nose_v</th>\n",
       "      <th>left_shoulder_x</th>\n",
       "      <th>left_shoulder_y</th>\n",
       "      <th>left_shoulder_z</th>\n",
       "      <th>left_shoulder_v</th>\n",
       "      <th>right_shoulder_x</th>\n",
       "      <th>...</th>\n",
       "      <th>right_heel_z</th>\n",
       "      <th>right_heel_v</th>\n",
       "      <th>left_foot_index_x</th>\n",
       "      <th>left_foot_index_y</th>\n",
       "      <th>left_foot_index_z</th>\n",
       "      <th>left_foot_index_v</th>\n",
       "      <th>right_foot_index_x</th>\n",
       "      <th>right_foot_index_y</th>\n",
       "      <th>right_foot_index_z</th>\n",
       "      <th>right_foot_index_v</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>28517</th>\n",
       "      <td>1</td>\n",
       "      <td>0.735630</td>\n",
       "      <td>0.543294</td>\n",
       "      <td>0.007467</td>\n",
       "      <td>0.999246</td>\n",
       "      <td>0.695831</td>\n",
       "      <td>0.417349</td>\n",
       "      <td>0.155194</td>\n",
       "      <td>0.995723</td>\n",
       "      <td>0.720067</td>\n",
       "      <td>...</td>\n",
       "      <td>0.086010</td>\n",
       "      <td>0.966131</td>\n",
       "      <td>0.226601</td>\n",
       "      <td>0.598075</td>\n",
       "      <td>0.219305</td>\n",
       "      <td>0.470830</td>\n",
       "      <td>0.220079</td>\n",
       "      <td>0.614120</td>\n",
       "      <td>0.026265</td>\n",
       "      <td>0.934942</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28518</th>\n",
       "      <td>1</td>\n",
       "      <td>0.775572</td>\n",
       "      <td>0.517579</td>\n",
       "      <td>0.012821</td>\n",
       "      <td>0.999378</td>\n",
       "      <td>0.704168</td>\n",
       "      <td>0.404210</td>\n",
       "      <td>0.162908</td>\n",
       "      <td>0.995909</td>\n",
       "      <td>0.730823</td>\n",
       "      <td>...</td>\n",
       "      <td>0.070911</td>\n",
       "      <td>0.967070</td>\n",
       "      <td>0.238810</td>\n",
       "      <td>0.610591</td>\n",
       "      <td>0.198591</td>\n",
       "      <td>0.496140</td>\n",
       "      <td>0.228907</td>\n",
       "      <td>0.625559</td>\n",
       "      <td>0.018591</td>\n",
       "      <td>0.938905</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28519</th>\n",
       "      <td>1</td>\n",
       "      <td>0.790600</td>\n",
       "      <td>0.498958</td>\n",
       "      <td>0.007789</td>\n",
       "      <td>0.999467</td>\n",
       "      <td>0.710651</td>\n",
       "      <td>0.394019</td>\n",
       "      <td>0.164441</td>\n",
       "      <td>0.996123</td>\n",
       "      <td>0.736771</td>\n",
       "      <td>...</td>\n",
       "      <td>0.085872</td>\n",
       "      <td>0.967943</td>\n",
       "      <td>0.238197</td>\n",
       "      <td>0.609329</td>\n",
       "      <td>0.233198</td>\n",
       "      <td>0.510583</td>\n",
       "      <td>0.227823</td>\n",
       "      <td>0.626068</td>\n",
       "      <td>0.036127</td>\n",
       "      <td>0.940917</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>3 rows × 69 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      label    nose_x    nose_y    nose_z    nose_v  left_shoulder_x  \\\n",
       "28517     1  0.735630  0.543294  0.007467  0.999246         0.695831   \n",
       "28518     1  0.775572  0.517579  0.012821  0.999378         0.704168   \n",
       "28519     1  0.790600  0.498958  0.007789  0.999467         0.710651   \n",
       "\n",
       "       left_shoulder_y  left_shoulder_z  left_shoulder_v  right_shoulder_x  \\\n",
       "28517         0.417349         0.155194         0.995723          0.720067   \n",
       "28518         0.404210         0.162908         0.995909          0.730823   \n",
       "28519         0.394019         0.164441         0.996123          0.736771   \n",
       "\n",
       "       ...  right_heel_z  right_heel_v  left_foot_index_x  left_foot_index_y  \\\n",
       "28517  ...      0.086010      0.966131           0.226601           0.598075   \n",
       "28518  ...      0.070911      0.967070           0.238810           0.610591   \n",
       "28519  ...      0.085872      0.967943           0.238197           0.609329   \n",
       "\n",
       "       left_foot_index_z  left_foot_index_v  right_foot_index_x  \\\n",
       "28517           0.219305           0.470830            0.220079   \n",
       "28518           0.198591           0.496140            0.228907   \n",
       "28519           0.233198           0.510583            0.227823   \n",
       "\n",
       "       right_foot_index_y  right_foot_index_z  right_foot_index_v  \n",
       "28517            0.614120            0.026265            0.934942  \n",
       "28518            0.625559            0.018591            0.938905  \n",
       "28519            0.626068            0.036127            0.940917  \n",
       "\n",
       "[3 rows x 69 columns]"
      ]
     },
     "execution_count": 56,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = describe_dataset(\"./train.csv\")\n",
    "df.loc[df[\"label\"] == \"C\", \"label\"] = 0\n",
    "df.loc[df[\"label\"] == \"H\", \"label\"] = 1\n",
    "df.loc[df[\"label\"] == \"L\", \"label\"] = 2\n",
    "df.tail(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract features and class\n",
    "X = df.drop(\"label\", axis=1)\n",
    "y = df[\"label\"].astype(\"int\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc = StandardScaler()\n",
    "X = pd.DataFrame(sc.fit_transform(X))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1469    0\n",
       "292     0\n",
       "1568    0\n",
       "Name: label, dtype: int64"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)\n",
    "y_test.head(3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.2. Train model using Scikit-Learn and train set evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [],
   "source": [
    "algorithms =[(\"LR\", LogisticRegression()),\n",
    "         (\"SVC\", SVC(probability=True)),\n",
    "         ('KNN',KNeighborsClassifier()),\n",
    "         (\"DTC\", DecisionTreeClassifier()),\n",
    "         (\"SGDC\", CalibratedClassifierCV(SGDClassifier())),\n",
    "         (\"NB\", GaussianNB()),\n",
    "         ('RF', RandomForestClassifier()),]\n",
    "\n",
    "models = {}\n",
    "final_results = []\n",
    "\n",
    "for name, model in algorithms:\n",
    "    trained_model = model.fit(X_train, y_train)\n",
    "    models[name] = trained_model\n",
    "\n",
    "    # Evaluate model\n",
    "    model_results = model.predict(X_test)\n",
    "\n",
    "    p_score = precision_score(y_test, model_results, average=None, labels=[0, 1, 2])\n",
    "    a_score = accuracy_score(y_test, model_results)\n",
    "    r_score = recall_score(y_test, model_results, average=None, labels=[0, 1, 2])\n",
    "    f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1, 2])\n",
    "    cm = confusion_matrix(y_test, model_results, labels=[0, 1, 2])\n",
    "    final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Precision Score</th>\n",
       "      <th>Accuracy score</th>\n",
       "      <th>Recall Score</th>\n",
       "      <th>F1 score</th>\n",
       "      <th>Confusion Matrix</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>KNN</td>\n",
       "      <td>[0.999, 1.0, 1.0]</td>\n",
       "      <td>0.999825</td>\n",
       "      <td>[1.0, 1.0, 0.999]</td>\n",
       "      <td>[1.0, 1.0, 1.0]</td>\n",
       "      <td>[[1915, 0, 0], [0, 1844, 0], [1, 0, 1944]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>LR</td>\n",
       "      <td>[0.999, 1.0, 0.999]</td>\n",
       "      <td>0.999649</td>\n",
       "      <td>[0.999, 1.0, 0.999]</td>\n",
       "      <td>[0.999, 1.0, 0.999]</td>\n",
       "      <td>[[1914, 0, 1], [0, 1844, 0], [1, 0, 1944]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SVC</td>\n",
       "      <td>[0.998, 1.0, 0.999]</td>\n",
       "      <td>0.999299</td>\n",
       "      <td>[0.999, 1.0, 0.998]</td>\n",
       "      <td>[0.999, 1.0, 0.999]</td>\n",
       "      <td>[[1914, 0, 1], [0, 1844, 0], [3, 0, 1942]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>RF</td>\n",
       "      <td>[0.998, 1.0, 1.0]</td>\n",
       "      <td>0.999474</td>\n",
       "      <td>[1.0, 0.999, 0.999]</td>\n",
       "      <td>[0.999, 1.0, 0.999]</td>\n",
       "      <td>[[1915, 0, 0], [1, 1843, 0], [2, 0, 1943]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SGDC</td>\n",
       "      <td>[0.999, 0.998, 0.999]</td>\n",
       "      <td>0.998597</td>\n",
       "      <td>[0.997, 1.0, 0.999]</td>\n",
       "      <td>[0.998, 0.999, 0.999]</td>\n",
       "      <td>[[1909, 4, 2], [0, 1844, 0], [2, 0, 1943]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>DTC</td>\n",
       "      <td>[0.994, 1.0, 0.999]</td>\n",
       "      <td>0.997721</td>\n",
       "      <td>[0.999, 0.998, 0.995]</td>\n",
       "      <td>[0.997, 0.999, 0.997]</td>\n",
       "      <td>[[1914, 0, 1], [3, 1841, 0], [9, 0, 1936]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NB</td>\n",
       "      <td>[0.816, 0.931, 0.941]</td>\n",
       "      <td>0.892532</td>\n",
       "      <td>[0.883, 0.951, 0.847]</td>\n",
       "      <td>[0.848, 0.941, 0.892]</td>\n",
       "      <td>[[1690, 122, 103], [91, 1753, 0], [290, 7, 1648]]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Model        Precision Score  Accuracy score           Recall Score  \\\n",
       "0   KNN      [0.999, 1.0, 1.0]        0.999825      [1.0, 1.0, 0.999]   \n",
       "1    LR    [0.999, 1.0, 0.999]        0.999649    [0.999, 1.0, 0.999]   \n",
       "2   SVC    [0.998, 1.0, 0.999]        0.999299    [0.999, 1.0, 0.998]   \n",
       "3    RF      [0.998, 1.0, 1.0]        0.999474    [1.0, 0.999, 0.999]   \n",
       "4  SGDC  [0.999, 0.998, 0.999]        0.998597    [0.997, 1.0, 0.999]   \n",
       "5   DTC    [0.994, 1.0, 0.999]        0.997721  [0.999, 0.998, 0.995]   \n",
       "6    NB  [0.816, 0.931, 0.941]        0.892532  [0.883, 0.951, 0.847]   \n",
       "\n",
       "                F1 score                                   Confusion Matrix  \n",
       "0        [1.0, 1.0, 1.0]         [[1915, 0, 0], [0, 1844, 0], [1, 0, 1944]]  \n",
       "1    [0.999, 1.0, 0.999]         [[1914, 0, 1], [0, 1844, 0], [1, 0, 1944]]  \n",
       "2    [0.999, 1.0, 0.999]         [[1914, 0, 1], [0, 1844, 0], [3, 0, 1942]]  \n",
       "3    [0.999, 1.0, 0.999]         [[1915, 0, 0], [1, 1843, 0], [2, 0, 1943]]  \n",
       "4  [0.998, 0.999, 0.999]         [[1909, 4, 2], [0, 1844, 0], [2, 0, 1943]]  \n",
       "5  [0.997, 0.999, 0.997]         [[1914, 0, 1], [3, 1841, 0], [9, 0, 1936]]  \n",
       "6  [0.848, 0.941, 0.892]  [[1690, 122, 103], [91, 1753, 0], [290, 7, 1648]]  "
      ]
     },
     "execution_count": 67,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Sort results by F1 score\n",
    "final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n",
    "\n",
    "pd.DataFrame(final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.3. Test set evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']\n",
      "Number of rows: 710 \n",
      "Number of columns: 69\n",
      "\n",
      "Labels: \n",
      "H    241\n",
      "L    235\n",
      "C    234\n",
      "Name: label, dtype: int64\n",
      "\n",
      "Missing values: False\n",
      "\n",
      "Duplicate Rows : 0\n"
     ]
    }
   ],
   "source": [
    "test_df = describe_dataset(\"./test.csv\")\n",
    "test_df = test_df.sample(frac=1).reset_index(drop=True)\n",
    "\n",
    "test_df.loc[test_df[\"label\"] == \"C\", \"label\"] = 0\n",
    "test_df.loc[test_df[\"label\"] == \"H\", \"label\"] = 1\n",
    "test_df.loc[test_df[\"label\"] == \"L\", \"label\"] = 2\n",
    "\n",
    "test_x = test_df.drop(\"label\", axis=1)\n",
    "test_y = test_df[\"label\"].astype(\"int\")\n",
    "\n",
    "test_x = pd.DataFrame(sc.transform(test_x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Precision Score</th>\n",
       "      <th>Accuracy score</th>\n",
       "      <th>Recall Score</th>\n",
       "      <th>F1 score</th>\n",
       "      <th>Confusion Matrix</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>LR</td>\n",
       "      <td>[0.987, 1.0, 1.0]</td>\n",
       "      <td>0.995775</td>\n",
       "      <td>[1.0, 0.996, 0.991]</td>\n",
       "      <td>[0.994, 0.998, 0.996]</td>\n",
       "      <td>[[234, 0, 0], [1, 240, 0], [2, 0, 233]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>SVC</td>\n",
       "      <td>[0.963, 1.0, 1.0]</td>\n",
       "      <td>0.987324</td>\n",
       "      <td>[1.0, 0.992, 0.97]</td>\n",
       "      <td>[0.981, 0.996, 0.985]</td>\n",
       "      <td>[[234, 0, 0], [2, 239, 0], [7, 0, 228]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SGDC</td>\n",
       "      <td>[0.974, 0.975, 0.996]</td>\n",
       "      <td>0.981690</td>\n",
       "      <td>[0.974, 0.983, 0.987]</td>\n",
       "      <td>[0.974, 0.979, 0.991]</td>\n",
       "      <td>[[228, 6, 0], [3, 237, 1], [3, 0, 232]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>KNN</td>\n",
       "      <td>[0.869, 0.996, 1.0]</td>\n",
       "      <td>0.949296</td>\n",
       "      <td>[0.996, 0.992, 0.86]</td>\n",
       "      <td>[0.928, 0.994, 0.924]</td>\n",
       "      <td>[[233, 1, 0], [2, 239, 0], [33, 0, 202]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>RF</td>\n",
       "      <td>[0.765, 1.0, 1.0]</td>\n",
       "      <td>0.898592</td>\n",
       "      <td>[1.0, 1.0, 0.694]</td>\n",
       "      <td>[0.867, 1.0, 0.819]</td>\n",
       "      <td>[[234, 0, 0], [0, 241, 0], [72, 0, 163]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>NB</td>\n",
       "      <td>[0.892, 0.737, 0.945]</td>\n",
       "      <td>0.842254</td>\n",
       "      <td>[0.632, 0.942, 0.949]</td>\n",
       "      <td>[0.74, 0.827, 0.947]</td>\n",
       "      <td>[[148, 73, 13], [14, 227, 0], [4, 8, 223]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>DTC</td>\n",
       "      <td>[0.69, 1.0, 0.625]</td>\n",
       "      <td>0.767606</td>\n",
       "      <td>[0.543, 0.988, 0.766]</td>\n",
       "      <td>[0.608, 0.994, 0.688]</td>\n",
       "      <td>[[127, 0, 107], [2, 238, 1], [55, 0, 180]]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Model        Precision Score  Accuracy score           Recall Score  \\\n",
       "0    LR      [0.987, 1.0, 1.0]        0.995775    [1.0, 0.996, 0.991]   \n",
       "1   SVC      [0.963, 1.0, 1.0]        0.987324     [1.0, 0.992, 0.97]   \n",
       "2  SGDC  [0.974, 0.975, 0.996]        0.981690  [0.974, 0.983, 0.987]   \n",
       "3   KNN    [0.869, 0.996, 1.0]        0.949296   [0.996, 0.992, 0.86]   \n",
       "4    RF      [0.765, 1.0, 1.0]        0.898592      [1.0, 1.0, 0.694]   \n",
       "5    NB  [0.892, 0.737, 0.945]        0.842254  [0.632, 0.942, 0.949]   \n",
       "6   DTC     [0.69, 1.0, 0.625]        0.767606  [0.543, 0.988, 0.766]   \n",
       "\n",
       "                F1 score                            Confusion Matrix  \n",
       "0  [0.994, 0.998, 0.996]     [[234, 0, 0], [1, 240, 0], [2, 0, 233]]  \n",
       "1  [0.981, 0.996, 0.985]     [[234, 0, 0], [2, 239, 0], [7, 0, 228]]  \n",
       "2  [0.974, 0.979, 0.991]     [[228, 6, 0], [3, 237, 1], [3, 0, 232]]  \n",
       "3  [0.928, 0.994, 0.924]    [[233, 1, 0], [2, 239, 0], [33, 0, 202]]  \n",
       "4    [0.867, 1.0, 0.819]    [[234, 0, 0], [0, 241, 0], [72, 0, 163]]  \n",
       "5   [0.74, 0.827, 0.947]  [[148, 73, 13], [14, 227, 0], [4, 8, 223]]  \n",
       "6  [0.608, 0.994, 0.688]  [[127, 0, 107], [2, 238, 1], [55, 0, 180]]  "
      ]
     },
     "execution_count": 70,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "testset_final_results = []\n",
    "\n",
    "for name, model in models.items():\n",
    "    # Evaluate model\n",
    "    model_results = model.predict(test_x)\n",
    "\n",
    "    p_score = precision_score(test_y, model_results, average=None, labels=[0, 1, 2])\n",
    "    a_score = accuracy_score(test_y, model_results)\n",
    "    r_score = recall_score(test_y, model_results, average=None, labels=[0, 1, 2])\n",
    "    f1_score_result = f1_score(test_y, model_results, average=None, labels=[0, 1, 2])\n",
    "    cm = confusion_matrix(test_y, model_results, labels=[0, 1, 2])\n",
    "    testset_final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm ))\n",
    "\n",
    "\n",
    "testset_final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n",
    "pd.DataFrame(testset_final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.4. Dumped model and input scaler using pickle\n",
    "\n",
    "According to the evaluations, there are multiple good models at the moment, therefore, the best models are LR and Ridge."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./model/all_sklearn.pkl\", \"wb\") as f:\n",
    "    pickle.dump(models, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./model/LR_model.pkl\", \"wb\") as f:\n",
    "    pickle.dump(models[\"LR\"], f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./model/SVC_model.pkl\", \"wb\") as f:\n",
    "    pickle.dump(models[\"SVC\"], f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Dump input scaler\n",
    "with open(\"./model/input_scaler.pkl\", \"wb\") as f:\n",
    "    pickle.dump(sc, f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.13 (conda)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}