{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "objc[49355]: Class CaptureDelegate is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_videoio.3.4.16.dylib (0x108688860) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece480). One of the two will be used. Which one is undefined.\n", "objc[49355]: Class CVWindow is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440a68) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece4d0). One of the two will be used. Which one is undefined.\n", "objc[49355]: Class CVView is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440a90) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece4f8). One of the two will be used. Which one is undefined.\n", "objc[49355]: Class CVSlider is implemented in both /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/mediapipe/.dylibs/libopencv_highgui.3.4.16.dylib (0x103440ab8) and /Users/fuixlabsdev1/Programming/PP/graduation-thesis/env/lib/python3.8/site-packages/cv2/cv2.abi3.so (0x160ece520). One of the two will be used. Which one is undefined.\n" ] } ], "source": [ "import mediapipe as mp\n", "import cv2\n", "import pandas as pd\n", "import pickle\n", "\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression, SGDClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.calibration import CalibratedClassifierCV\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "# Drawing helpers\n", "mp_drawing = mp.solutions.drawing_utils\n", "mp_pose = mp.solutions.pose" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1. Train model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1.1. Describe data and split dataset" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def rescale_frame(frame, percent=50):\n", " '''\n", " Rescale a frame to a certain percentage compare to its original frame\n", " '''\n", " width = int(frame.shape[1] * percent/ 100)\n", " height = int(frame.shape[0] * percent/ 100)\n", " dim = (width, height)\n", " return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)\n", "\n", "\n", "def describe_dataset(dataset_path: str):\n", " '''\n", " Describe dataset\n", " '''\n", "\n", " data = pd.read_csv(dataset_path)\n", " print(f\"Headers: {list(data.columns.values)}\")\n", " print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n", " print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n", " print(f\"Missing values: {data.isnull().values.any()}\\n\")\n", " \n", " duplicate = data[data.duplicated()]\n", " print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n", "\n", " return data\n", "\n", "\n", "def round_up_metric_results(results) -> list:\n", " '''Round up metrics results such as precision score, recall score, ...'''\n", " return list(map(lambda el: round(el, 3), results))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v']\n", "Number of rows: 15372 \n", "Number of columns: 37\n", "\n", "Labels: \n", "C 8238\n", "L 7134\n", "Name: label, dtype: int64\n", "\n", "Missing values: False\n", "\n", "Duplicate Rows : 0\n" ] } ], "source": [ "# load dataset\n", "df = describe_dataset(\"./train.csv\")\n", "\n", "# Categorizing label\n", "df.loc[df[\"label\"] == \"C\", \"label\"] = 0\n", "df.loc[df[\"label\"] == \"L\", \"label\"] = 1" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "sc = StandardScaler()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/input_scaler.pkl\", \"rb\") as f:\n", " sc = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Standard Scaling of features\n", "x = df.drop(\"label\", axis = 1)\n", "x = pd.DataFrame(sc.transform(x))\n", "\n", "y = df[\"label\"].astype('int')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9465 1\n", "8833 0\n", "6190 0\n", "7645 0\n", "13890 1\n", " ..\n", "11468 1\n", "7221 1\n", "1318 1\n", "8915 1\n", "11055 1\n", "Name: label, Length: 12297, dtype: int64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1234)\n", "y_train" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1.2. Train model using Scikit-learn" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelPrecision ScoreAccuracy scoreRecall ScoreF1 scoreConfusion Matrix
0RF[0.999, 0.999]0.999024[0.999, 0.999][0.999, 0.999][[1677, 2], [1, 1395]]
1KNN[0.997, 0.999]0.998049[0.999, 0.996][0.998, 0.998][[1678, 1], [5, 1391]]
2SVC[0.997, 0.995]0.996098[0.996, 0.996][0.996, 0.996][[1672, 7], [5, 1391]]
3DTC[0.997, 0.991]0.994146[0.992, 0.996][0.995, 0.994][[1666, 13], [5, 1391]]
4SGDC[0.987, 0.974]0.981463[0.979, 0.985][0.983, 0.98][[1643, 36], [21, 1375]]
5LR[0.986, 0.975]0.980813[0.979, 0.983][0.982, 0.979][[1644, 35], [24, 1372]]
6NB[0.927, 0.842]0.884878[0.857, 0.918][0.89, 0.879][[1439, 240], [114, 1282]]
\n", "
" ], "text/plain": [ " Model Precision Score Accuracy score Recall Score F1 score \\\n", "0 RF [0.999, 0.999] 0.999024 [0.999, 0.999] [0.999, 0.999] \n", "1 KNN [0.997, 0.999] 0.998049 [0.999, 0.996] [0.998, 0.998] \n", "2 SVC [0.997, 0.995] 0.996098 [0.996, 0.996] [0.996, 0.996] \n", "3 DTC [0.997, 0.991] 0.994146 [0.992, 0.996] [0.995, 0.994] \n", "4 SGDC [0.987, 0.974] 0.981463 [0.979, 0.985] [0.983, 0.98] \n", "5 LR [0.986, 0.975] 0.980813 [0.979, 0.983] [0.982, 0.979] \n", "6 NB [0.927, 0.842] 0.884878 [0.857, 0.918] [0.89, 0.879] \n", "\n", " Confusion Matrix \n", "0 [[1677, 2], [1, 1395]] \n", "1 [[1678, 1], [5, 1391]] \n", "2 [[1672, 7], [5, 1391]] \n", "3 [[1666, 13], [5, 1391]] \n", "4 [[1643, 36], [21, 1375]] \n", "5 [[1644, 35], [24, 1372]] \n", "6 [[1439, 240], [114, 1282]] " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "algorithms =[(\"LR\", LogisticRegression()),\n", " (\"SVC\", SVC(probability=True)),\n", " ('KNN',KNeighborsClassifier()),\n", " (\"DTC\", DecisionTreeClassifier()),\n", " (\"SGDC\", CalibratedClassifierCV(SGDClassifier())),\n", " (\"NB\", GaussianNB()),\n", " ('RF', RandomForestClassifier()),]\n", "\n", "models = {}\n", "final_results = []\n", "\n", "for name, model in algorithms:\n", " trained_model = model.fit(X_train, y_train)\n", " models[name] = trained_model\n", "\n", " # Evaluate model\n", " model_results = model.predict(X_test)\n", "\n", " p_score = precision_score(y_test, model_results, average=None, labels=[0, 1])\n", " a_score = accuracy_score(y_test, model_results)\n", " r_score = recall_score(y_test, model_results, average=None, labels=[0, 1])\n", " f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1])\n", " cm = confusion_matrix(y_test, model_results, labels=[0, 1])\n", " final_results.append(( name, round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))\n", "\n", "# Sort results by F1 score\n", "final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n", "pd.DataFrame(final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 1.3. Dump models pickle" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/all_sklearn.pkl\", \"wb\") as f:\n", " pickle.dump(models, f)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/input_scaler.pkl\", \"wb\") as f:\n", " pickle.dump(sc, f)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.13 (conda)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65" } } }, "nbformat": 4, "nbformat_minor": 2 }