{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "objc[49355]: Class CaptureDelegate is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x108688860) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece480). One of the two will be used. Which one is undefined.\n",
      "objc[49355]: Class CVWindow is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440a68) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece4d0). One of the two will be used. Which one is undefined.\n",
      "objc[49355]: Class CVView is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440a90) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece4f8). One of the two will be used. Which one is undefined.\n",
      "objc[49355]: Class CVSlider is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440ab8) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece520). One of the two will be used. Which one is undefined.\n"
     ]
    }
   ],
   "source": [
    "import mediapipe as mp\n",
    "import cv2\n",
    "import pandas as pd\n",
    "import pickle\n",
    "\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression, SGDClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.naive_bayes import GaussianNB\n",
    "from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.calibration import CalibratedClassifierCV\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "\n",
    "# Drawing helpers\n",
    "mp_drawing = mp.solutions.drawing_utils\n",
    "mp_pose = mp.solutions.pose"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1. Train model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.1. Describe data and split dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def rescale_frame(frame, percent=50):\n",
    "    '''\n",
    "    Rescale a frame to a certain percentage compare to its original frame\n",
    "    '''\n",
    "    width = int(frame.shape[1] * percent/ 100)\n",
    "    height = int(frame.shape[0] * percent/ 100)\n",
    "    dim = (width, height)\n",
    "    return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)\n",
    "\n",
    "\n",
    "def describe_dataset(dataset_path: str):\n",
    "    '''\n",
    "    Describe dataset\n",
    "    '''\n",
    "\n",
    "    data = pd.read_csv(dataset_path)\n",
    "    print(f\"Headers: {list(data.columns.values)}\")\n",
    "    print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n",
    "    print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n",
    "    print(f\"Missing values: {data.isnull().values.any()}\\n\")\n",
    "    \n",
    "    duplicate = data[data.duplicated()]\n",
    "    print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n",
    "\n",
    "    return data\n",
    "\n",
    "\n",
    "def round_up_metric_results(results) -> list:\n",
    "    '''Round up metrics results such as precision score, recall score, ...'''\n",
    "    return list(map(lambda el: round(el, 3), results))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v']\n",
      "Number of rows: 15372 \n",
      "Number of columns: 37\n",
      "\n",
      "Labels: \n",
      "C    8238\n",
      "L    7134\n",
      "Name: label, dtype: int64\n",
      "\n",
      "Missing values: False\n",
      "\n",
      "Duplicate Rows : 0\n"
     ]
    }
   ],
   "source": [
    "# load dataset\n",
    "df = describe_dataset(\"./train.csv\")\n",
    "\n",
    "# Categorizing label\n",
    "df.loc[df[\"label\"] == \"C\", \"label\"] = 0\n",
    "df.loc[df[\"label\"] == \"L\", \"label\"] = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "sc = StandardScaler()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./model/input_scaler.pkl\", \"rb\") as f:\n",
    "    sc = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Standard Scaling of features\n",
    "x = df.drop(\"label\", axis = 1)\n",
    "x = pd.DataFrame(sc.transform(x))\n",
    "\n",
    "y = df[\"label\"].astype('int')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "9465     1\n",
       "8833     0\n",
       "6190     0\n",
       "7645     0\n",
       "13890    1\n",
       "        ..\n",
       "11468    1\n",
       "7221     1\n",
       "1318     1\n",
       "8915     1\n",
       "11055    1\n",
       "Name: label, Length: 12297, dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)\n",
    "y_train"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.2. Train model using Scikit-learn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>Precision Score</th>\n",
       "      <th>Accuracy score</th>\n",
       "      <th>Recall Score</th>\n",
       "      <th>F1 score</th>\n",
       "      <th>Confusion Matrix</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>RF</td>\n",
       "      <td>[0.999, 0.999]</td>\n",
       "      <td>0.999024</td>\n",
       "      <td>[0.999, 0.999]</td>\n",
       "      <td>[0.999, 0.999]</td>\n",
       "      <td>[[1677, 2], [1, 1395]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>KNN</td>\n",
       "      <td>[0.997, 0.999]</td>\n",
       "      <td>0.998049</td>\n",
       "      <td>[0.999, 0.996]</td>\n",
       "      <td>[0.998, 0.998]</td>\n",
       "      <td>[[1678, 1], [5, 1391]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>SVC</td>\n",
       "      <td>[0.997, 0.995]</td>\n",
       "      <td>0.996098</td>\n",
       "      <td>[0.996, 0.996]</td>\n",
       "      <td>[0.996, 0.996]</td>\n",
       "      <td>[[1672, 7], [5, 1391]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>DTC</td>\n",
       "      <td>[0.997, 0.991]</td>\n",
       "      <td>0.994146</td>\n",
       "      <td>[0.992, 0.996]</td>\n",
       "      <td>[0.995, 0.994]</td>\n",
       "      <td>[[1666, 13], [5, 1391]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>SGDC</td>\n",
       "      <td>[0.987, 0.974]</td>\n",
       "      <td>0.981463</td>\n",
       "      <td>[0.979, 0.985]</td>\n",
       "      <td>[0.983, 0.98]</td>\n",
       "      <td>[[1643, 36], [21, 1375]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>LR</td>\n",
       "      <td>[0.986, 0.975]</td>\n",
       "      <td>0.980813</td>\n",
       "      <td>[0.979, 0.983]</td>\n",
       "      <td>[0.982, 0.979]</td>\n",
       "      <td>[[1644, 35], [24, 1372]]</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>NB</td>\n",
       "      <td>[0.927, 0.842]</td>\n",
       "      <td>0.884878</td>\n",
       "      <td>[0.857, 0.918]</td>\n",
       "      <td>[0.89, 0.879]</td>\n",
       "      <td>[[1439, 240], [114, 1282]]</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Model Precision Score  Accuracy score    Recall Score        F1 score  \\\n",
       "0    RF  [0.999, 0.999]        0.999024  [0.999, 0.999]  [0.999, 0.999]   \n",
       "1   KNN  [0.997, 0.999]        0.998049  [0.999, 0.996]  [0.998, 0.998]   \n",
       "2   SVC  [0.997, 0.995]        0.996098  [0.996, 0.996]  [0.996, 0.996]   \n",
       "3   DTC  [0.997, 0.991]        0.994146  [0.992, 0.996]  [0.995, 0.994]   \n",
       "4  SGDC  [0.987, 0.974]        0.981463  [0.979, 0.985]   [0.983, 0.98]   \n",
       "5    LR  [0.986, 0.975]        0.980813  [0.979, 0.983]  [0.982, 0.979]   \n",
       "6    NB  [0.927, 0.842]        0.884878  [0.857, 0.918]   [0.89, 0.879]   \n",
       "\n",
       "             Confusion Matrix  \n",
       "0      [[1677, 2], [1, 1395]]  \n",
       "1      [[1678, 1], [5, 1391]]  \n",
       "2      [[1672, 7], [5, 1391]]  \n",
       "3     [[1666, 13], [5, 1391]]  \n",
       "4    [[1643, 36], [21, 1375]]  \n",
       "5    [[1644, 35], [24, 1372]]  \n",
       "6  [[1439, 240], [114, 1282]]  "
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "algorithms =[(\"LR\", LogisticRegression()),\n",
    "         (\"SVC\", SVC(probability=True)),\n",
    "         ('KNN',KNeighborsClassifier()),\n",
    "         (\"DTC\", DecisionTreeClassifier()),\n",
    "         (\"SGDC\", CalibratedClassifierCV(SGDClassifier())),\n",
    "         (\"NB\", GaussianNB()),\n",
    "         ('RF', RandomForestClassifier()),]\n",
    "\n",
    "models = {}\n",
    "final_results = []\n",
    "\n",
    "for name, model in algorithms:\n",
    "    trained_model = model.fit(X_train, y_train)\n",
    "    models[name] = trained_model\n",
    "\n",
    "    # Evaluate model\n",
    "    model_results = model.predict(X_test)\n",
    "\n",
    "    p_score = precision_score(y_test, model_results, average=None, labels=[0, 1])\n",
    "    a_score = accuracy_score(y_test, model_results)\n",
    "    r_score = recall_score(y_test, model_results, average=None, labels=[0, 1])\n",
    "    f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1])\n",
    "    cm = confusion_matrix(y_test, model_results, labels=[0, 1])\n",
    "    final_results.append(( name,  round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))\n",
    "\n",
    "# Sort results by F1 score\n",
    "final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n",
    "pd.DataFrame(final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 1.3. Dump models pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./model/all_sklearn.pkl\", \"wb\") as f:\n",
    "    pickle.dump(models, f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(\"./model/input_scaler.pkl\", \"wb\") as f:\n",
    "    pickle.dump(sc, f)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.13 (conda)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}