{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "objc[49355]: Class CaptureDelegate is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x108688860) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece480). One of the two will be used. Which one is undefined.\n", "objc[49355]: Class CVWindow is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440a68) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece4d0). One of the two will be used. Which one is undefined.\n", "objc[49355]: Class CVView is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440a90) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece4f8). One of the two will be used. Which one is undefined.\n", "objc[49355]: Class CVSlider is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440ab8) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece520). One of the two will be used. Which one is undefined.\n" ] } ], "source": [ "import mediapipe as mp\n", "import cv2\n", "import pandas as pd\n", "import pickle\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression, SGDClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.calibration import CalibratedClassifierCV\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "# Drawing helpers\n", "mp_drawing = mp.solutions.drawing_utils\n", "mp_pose = mp.solutions.pose" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Train model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1.1. Describe data and split dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def rescale_frame(frame, percent=50):\n", " '''\n", " Rescale a frame to a certain percentage compare to its original frame\n", " '''\n", " width = int(frame.shape[1] * percent/ 100)\n", " height = int(frame.shape[0] * percent/ 100)\n", " dim = (width, height)\n", " return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)\n", "\n", "\n", "def describe_dataset(dataset_path: str):\n", " '''\n", " Describe dataset\n", " '''\n", "\n", " data = pd.read_csv(dataset_path)\n", " print(f\"Headers: {list(data.columns.values)}\")\n", " print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n", " print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n", " print(f\"Missing values: {data.isnull().values.any()}\\n\")\n", " \n", " duplicate = data[data.duplicated()]\n", " print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n", "\n", " return data\n", "\n", "\n", "def round_up_metric_results(results) -> list:\n", " '''Round up metrics results such as precision score, recall score, ...'''\n", " return list(map(lambda el: round(el, 3), results))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v']\n", "Number of rows: 15372 \n", "Number of columns: 37\n", "\n", "Labels: \n", "C 8238\n", "L 7134\n", "Name: label, dtype: int64\n", "\n", "Missing values: False\n", "\n", "Duplicate Rows : 0\n" ] } ], "source": [ "# load dataset\n", "df = describe_dataset(\"./train.csv\")\n", "\n", "# Categorizing label\n", "df.loc[df[\"label\"] == \"C\", \"label\"] = 0\n", "df.loc[df[\"label\"] == \"L\", \"label\"] = 1" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "sc = StandardScaler()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/input_scaler.pkl\", \"rb\") as f:\n", " sc = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Standard Scaling of features\n", "x = df.drop(\"label\", axis = 1)\n", "x = pd.DataFrame(sc.transform(x))\n", "\n", "y = df[\"label\"].astype('int')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9465 1\n", "8833 0\n", "6190 0\n", "7645 0\n", "13890 1\n", " ..\n", "11468 1\n", "7221 1\n", "1318 1\n", "8915 1\n", "11055 1\n", "Name: label, Length: 12297, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)\n", "y_train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1.2. Train model using Scikit-learn" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Model | \n", "Precision Score | \n", "Accuracy score | \n", "Recall Score | \n", "F1 score | \n", "Confusion Matrix | \n", "
---|---|---|---|---|---|---|
0 | \n", "RF | \n", "[0.999, 0.999] | \n", "0.999024 | \n", "[0.999, 0.999] | \n", "[0.999, 0.999] | \n", "[[1677, 2], [1, 1395]] | \n", "
1 | \n", "KNN | \n", "[0.997, 0.999] | \n", "0.998049 | \n", "[0.999, 0.996] | \n", "[0.998, 0.998] | \n", "[[1678, 1], [5, 1391]] | \n", "
2 | \n", "SVC | \n", "[0.997, 0.995] | \n", "0.996098 | \n", "[0.996, 0.996] | \n", "[0.996, 0.996] | \n", "[[1672, 7], [5, 1391]] | \n", "
3 | \n", "DTC | \n", "[0.997, 0.991] | \n", "0.994146 | \n", "[0.992, 0.996] | \n", "[0.995, 0.994] | \n", "[[1666, 13], [5, 1391]] | \n", "
4 | \n", "SGDC | \n", "[0.987, 0.974] | \n", "0.981463 | \n", "[0.979, 0.985] | \n", "[0.983, 0.98] | \n", "[[1643, 36], [21, 1375]] | \n", "
5 | \n", "LR | \n", "[0.986, 0.975] | \n", "0.980813 | \n", "[0.979, 0.983] | \n", "[0.982, 0.979] | \n", "[[1644, 35], [24, 1372]] | \n", "
6 | \n", "NB | \n", "[0.927, 0.842] | \n", "0.884878 | \n", "[0.857, 0.918] | \n", "[0.89, 0.879] | \n", "[[1439, 240], [114, 1282]] | \n", "