{
"cells": [
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [],
"source": [
"import mediapipe as mp\n",
"import cv2\n",
"import pandas as pd\n",
"import pickle\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.calibration import CalibratedClassifierCV\n",
"from sklearn.linear_model import LogisticRegression, SGDClassifier\n",
"from sklearn.svm import SVC\n",
"from sklearn.neighbors import KNeighborsClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.naive_bayes import GaussianNB\n",
"\n",
"from sklearn.metrics import precision_score, accuracy_score, f1_score, recall_score, confusion_matrix\n",
"\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"\n",
"# Drawing helpers\n",
"mp_drawing = mp.solutions.drawing_utils\n",
"mp_pose = mp.solutions.pose"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Train Model"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1.1. Describe data and split dataset"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"def rescale_frame(frame, percent=50):\n",
" '''\n",
" Rescale a frame to a certain percentage compare to its original frame\n",
" '''\n",
" width = int(frame.shape[1] * percent/ 100)\n",
" height = int(frame.shape[0] * percent/ 100)\n",
" dim = (width, height)\n",
" return cv2.resize(frame, dim, interpolation = cv2.INTER_AREA)\n",
"\n",
"\n",
"def describe_dataset(dataset_path: str):\n",
" '''\n",
" Describe dataset\n",
" '''\n",
"\n",
" data = pd.read_csv(dataset_path)\n",
" print(f\"Headers: {list(data.columns.values)}\")\n",
" print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n",
" print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n",
" print(f\"Missing values: {data.isnull().values.any()}\\n\")\n",
" \n",
" duplicate = data[data.duplicated()]\n",
" print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n",
"\n",
" return data\n",
"\n",
"\n",
"def round_up_metric_results(results) -> list:\n",
" '''Round up metrics results such as precision score, recall score, ...'''\n",
" return list(map(lambda el: round(el, 3), results))"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']\n",
"Number of rows: 28520 \n",
"Number of columns: 69\n",
"\n",
"Labels: \n",
"C 9904\n",
"L 9546\n",
"H 9070\n",
"Name: label, dtype: int64\n",
"\n",
"Missing values: False\n",
"\n",
"Duplicate Rows : 0\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" label | \n",
" nose_x | \n",
" nose_y | \n",
" nose_z | \n",
" nose_v | \n",
" left_shoulder_x | \n",
" left_shoulder_y | \n",
" left_shoulder_z | \n",
" left_shoulder_v | \n",
" right_shoulder_x | \n",
" ... | \n",
" right_heel_z | \n",
" right_heel_v | \n",
" left_foot_index_x | \n",
" left_foot_index_y | \n",
" left_foot_index_z | \n",
" left_foot_index_v | \n",
" right_foot_index_x | \n",
" right_foot_index_y | \n",
" right_foot_index_z | \n",
" right_foot_index_v | \n",
"
\n",
" \n",
" \n",
" \n",
" 28517 | \n",
" 1 | \n",
" 0.735630 | \n",
" 0.543294 | \n",
" 0.007467 | \n",
" 0.999246 | \n",
" 0.695831 | \n",
" 0.417349 | \n",
" 0.155194 | \n",
" 0.995723 | \n",
" 0.720067 | \n",
" ... | \n",
" 0.086010 | \n",
" 0.966131 | \n",
" 0.226601 | \n",
" 0.598075 | \n",
" 0.219305 | \n",
" 0.470830 | \n",
" 0.220079 | \n",
" 0.614120 | \n",
" 0.026265 | \n",
" 0.934942 | \n",
"
\n",
" \n",
" 28518 | \n",
" 1 | \n",
" 0.775572 | \n",
" 0.517579 | \n",
" 0.012821 | \n",
" 0.999378 | \n",
" 0.704168 | \n",
" 0.404210 | \n",
" 0.162908 | \n",
" 0.995909 | \n",
" 0.730823 | \n",
" ... | \n",
" 0.070911 | \n",
" 0.967070 | \n",
" 0.238810 | \n",
" 0.610591 | \n",
" 0.198591 | \n",
" 0.496140 | \n",
" 0.228907 | \n",
" 0.625559 | \n",
" 0.018591 | \n",
" 0.938905 | \n",
"
\n",
" \n",
" 28519 | \n",
" 1 | \n",
" 0.790600 | \n",
" 0.498958 | \n",
" 0.007789 | \n",
" 0.999467 | \n",
" 0.710651 | \n",
" 0.394019 | \n",
" 0.164441 | \n",
" 0.996123 | \n",
" 0.736771 | \n",
" ... | \n",
" 0.085872 | \n",
" 0.967943 | \n",
" 0.238197 | \n",
" 0.609329 | \n",
" 0.233198 | \n",
" 0.510583 | \n",
" 0.227823 | \n",
" 0.626068 | \n",
" 0.036127 | \n",
" 0.940917 | \n",
"
\n",
" \n",
"
\n",
"
3 rows × 69 columns
\n",
"
"
],
"text/plain": [
" label nose_x nose_y nose_z nose_v left_shoulder_x \\\n",
"28517 1 0.735630 0.543294 0.007467 0.999246 0.695831 \n",
"28518 1 0.775572 0.517579 0.012821 0.999378 0.704168 \n",
"28519 1 0.790600 0.498958 0.007789 0.999467 0.710651 \n",
"\n",
" left_shoulder_y left_shoulder_z left_shoulder_v right_shoulder_x \\\n",
"28517 0.417349 0.155194 0.995723 0.720067 \n",
"28518 0.404210 0.162908 0.995909 0.730823 \n",
"28519 0.394019 0.164441 0.996123 0.736771 \n",
"\n",
" ... right_heel_z right_heel_v left_foot_index_x left_foot_index_y \\\n",
"28517 ... 0.086010 0.966131 0.226601 0.598075 \n",
"28518 ... 0.070911 0.967070 0.238810 0.610591 \n",
"28519 ... 0.085872 0.967943 0.238197 0.609329 \n",
"\n",
" left_foot_index_z left_foot_index_v right_foot_index_x \\\n",
"28517 0.219305 0.470830 0.220079 \n",
"28518 0.198591 0.496140 0.228907 \n",
"28519 0.233198 0.510583 0.227823 \n",
"\n",
" right_foot_index_y right_foot_index_z right_foot_index_v \n",
"28517 0.614120 0.026265 0.934942 \n",
"28518 0.625559 0.018591 0.938905 \n",
"28519 0.626068 0.036127 0.940917 \n",
"\n",
"[3 rows x 69 columns]"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = describe_dataset(\"./train.csv\")\n",
"df.loc[df[\"label\"] == \"C\", \"label\"] = 0\n",
"df.loc[df[\"label\"] == \"H\", \"label\"] = 1\n",
"df.loc[df[\"label\"] == \"L\", \"label\"] = 2\n",
"df.tail(3)"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"# Extract features and class\n",
"X = df.drop(\"label\", axis=1)\n",
"y = df[\"label\"].astype(\"int\")"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [],
"source": [
"sc = StandardScaler()\n",
"X = pd.DataFrame(sc.fit_transform(X))"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1469 0\n",
"292 0\n",
"1568 0\n",
"Name: label, dtype: int64"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1234)\n",
"y_test.head(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1.2. Train model using Scikit-Learn and train set evaluation"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"algorithms =[(\"LR\", LogisticRegression()),\n",
" (\"SVC\", SVC(probability=True)),\n",
" ('KNN',KNeighborsClassifier()),\n",
" (\"DTC\", DecisionTreeClassifier()),\n",
" (\"SGDC\", CalibratedClassifierCV(SGDClassifier())),\n",
" (\"NB\", GaussianNB()),\n",
" ('RF', RandomForestClassifier()),]\n",
"\n",
"models = {}\n",
"final_results = []\n",
"\n",
"for name, model in algorithms:\n",
" trained_model = model.fit(X_train, y_train)\n",
" models[name] = trained_model\n",
"\n",
" # Evaluate model\n",
" model_results = model.predict(X_test)\n",
"\n",
" p_score = precision_score(y_test, model_results, average=None, labels=[0, 1, 2])\n",
" a_score = accuracy_score(y_test, model_results)\n",
" r_score = recall_score(y_test, model_results, average=None, labels=[0, 1, 2])\n",
" f1_score_result = f1_score(y_test, model_results, average=None, labels=[0, 1, 2])\n",
" cm = confusion_matrix(y_test, model_results, labels=[0, 1, 2])\n",
" final_results.append(( name, round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm))\n"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Model | \n",
" Precision Score | \n",
" Accuracy score | \n",
" Recall Score | \n",
" F1 score | \n",
" Confusion Matrix | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" KNN | \n",
" [0.999, 1.0, 1.0] | \n",
" 0.999825 | \n",
" [1.0, 1.0, 0.999] | \n",
" [1.0, 1.0, 1.0] | \n",
" [[1915, 0, 0], [0, 1844, 0], [1, 0, 1944]] | \n",
"
\n",
" \n",
" 1 | \n",
" LR | \n",
" [0.999, 1.0, 0.999] | \n",
" 0.999649 | \n",
" [0.999, 1.0, 0.999] | \n",
" [0.999, 1.0, 0.999] | \n",
" [[1914, 0, 1], [0, 1844, 0], [1, 0, 1944]] | \n",
"
\n",
" \n",
" 2 | \n",
" SVC | \n",
" [0.998, 1.0, 0.999] | \n",
" 0.999299 | \n",
" [0.999, 1.0, 0.998] | \n",
" [0.999, 1.0, 0.999] | \n",
" [[1914, 0, 1], [0, 1844, 0], [3, 0, 1942]] | \n",
"
\n",
" \n",
" 3 | \n",
" RF | \n",
" [0.998, 1.0, 1.0] | \n",
" 0.999474 | \n",
" [1.0, 0.999, 0.999] | \n",
" [0.999, 1.0, 0.999] | \n",
" [[1915, 0, 0], [1, 1843, 0], [2, 0, 1943]] | \n",
"
\n",
" \n",
" 4 | \n",
" SGDC | \n",
" [0.999, 0.998, 0.999] | \n",
" 0.998597 | \n",
" [0.997, 1.0, 0.999] | \n",
" [0.998, 0.999, 0.999] | \n",
" [[1909, 4, 2], [0, 1844, 0], [2, 0, 1943]] | \n",
"
\n",
" \n",
" 5 | \n",
" DTC | \n",
" [0.994, 1.0, 0.999] | \n",
" 0.997721 | \n",
" [0.999, 0.998, 0.995] | \n",
" [0.997, 0.999, 0.997] | \n",
" [[1914, 0, 1], [3, 1841, 0], [9, 0, 1936]] | \n",
"
\n",
" \n",
" 6 | \n",
" NB | \n",
" [0.816, 0.931, 0.941] | \n",
" 0.892532 | \n",
" [0.883, 0.951, 0.847] | \n",
" [0.848, 0.941, 0.892] | \n",
" [[1690, 122, 103], [91, 1753, 0], [290, 7, 1648]] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Model Precision Score Accuracy score Recall Score \\\n",
"0 KNN [0.999, 1.0, 1.0] 0.999825 [1.0, 1.0, 0.999] \n",
"1 LR [0.999, 1.0, 0.999] 0.999649 [0.999, 1.0, 0.999] \n",
"2 SVC [0.998, 1.0, 0.999] 0.999299 [0.999, 1.0, 0.998] \n",
"3 RF [0.998, 1.0, 1.0] 0.999474 [1.0, 0.999, 0.999] \n",
"4 SGDC [0.999, 0.998, 0.999] 0.998597 [0.997, 1.0, 0.999] \n",
"5 DTC [0.994, 1.0, 0.999] 0.997721 [0.999, 0.998, 0.995] \n",
"6 NB [0.816, 0.931, 0.941] 0.892532 [0.883, 0.951, 0.847] \n",
"\n",
" F1 score Confusion Matrix \n",
"0 [1.0, 1.0, 1.0] [[1915, 0, 0], [0, 1844, 0], [1, 0, 1944]] \n",
"1 [0.999, 1.0, 0.999] [[1914, 0, 1], [0, 1844, 0], [1, 0, 1944]] \n",
"2 [0.999, 1.0, 0.999] [[1914, 0, 1], [0, 1844, 0], [3, 0, 1942]] \n",
"3 [0.999, 1.0, 0.999] [[1915, 0, 0], [1, 1843, 0], [2, 0, 1943]] \n",
"4 [0.998, 0.999, 0.999] [[1909, 4, 2], [0, 1844, 0], [2, 0, 1943]] \n",
"5 [0.997, 0.999, 0.997] [[1914, 0, 1], [3, 1841, 0], [9, 0, 1936]] \n",
"6 [0.848, 0.941, 0.892] [[1690, 122, 103], [91, 1753, 0], [290, 7, 1648]] "
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Sort results by F1 score\n",
"final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n",
"\n",
"pd.DataFrame(final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1.3. Test set evaluation"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v', 'left_knee_x', 'left_knee_y', 'left_knee_z', 'left_knee_v', 'right_knee_x', 'right_knee_y', 'right_knee_z', 'right_knee_v', 'left_ankle_x', 'left_ankle_y', 'left_ankle_z', 'left_ankle_v', 'right_ankle_x', 'right_ankle_y', 'right_ankle_z', 'right_ankle_v', 'left_heel_x', 'left_heel_y', 'left_heel_z', 'left_heel_v', 'right_heel_x', 'right_heel_y', 'right_heel_z', 'right_heel_v', 'left_foot_index_x', 'left_foot_index_y', 'left_foot_index_z', 'left_foot_index_v', 'right_foot_index_x', 'right_foot_index_y', 'right_foot_index_z', 'right_foot_index_v']\n",
"Number of rows: 710 \n",
"Number of columns: 69\n",
"\n",
"Labels: \n",
"H 241\n",
"L 235\n",
"C 234\n",
"Name: label, dtype: int64\n",
"\n",
"Missing values: False\n",
"\n",
"Duplicate Rows : 0\n"
]
}
],
"source": [
"test_df = describe_dataset(\"./test.csv\")\n",
"test_df = test_df.sample(frac=1).reset_index(drop=True)\n",
"\n",
"test_df.loc[test_df[\"label\"] == \"C\", \"label\"] = 0\n",
"test_df.loc[test_df[\"label\"] == \"H\", \"label\"] = 1\n",
"test_df.loc[test_df[\"label\"] == \"L\", \"label\"] = 2\n",
"\n",
"test_x = test_df.drop(\"label\", axis=1)\n",
"test_y = test_df[\"label\"].astype(\"int\")\n",
"\n",
"test_x = pd.DataFrame(sc.transform(test_x))"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Model | \n",
" Precision Score | \n",
" Accuracy score | \n",
" Recall Score | \n",
" F1 score | \n",
" Confusion Matrix | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" LR | \n",
" [0.987, 1.0, 1.0] | \n",
" 0.995775 | \n",
" [1.0, 0.996, 0.991] | \n",
" [0.994, 0.998, 0.996] | \n",
" [[234, 0, 0], [1, 240, 0], [2, 0, 233]] | \n",
"
\n",
" \n",
" 1 | \n",
" SVC | \n",
" [0.963, 1.0, 1.0] | \n",
" 0.987324 | \n",
" [1.0, 0.992, 0.97] | \n",
" [0.981, 0.996, 0.985] | \n",
" [[234, 0, 0], [2, 239, 0], [7, 0, 228]] | \n",
"
\n",
" \n",
" 2 | \n",
" SGDC | \n",
" [0.974, 0.975, 0.996] | \n",
" 0.981690 | \n",
" [0.974, 0.983, 0.987] | \n",
" [0.974, 0.979, 0.991] | \n",
" [[228, 6, 0], [3, 237, 1], [3, 0, 232]] | \n",
"
\n",
" \n",
" 3 | \n",
" KNN | \n",
" [0.869, 0.996, 1.0] | \n",
" 0.949296 | \n",
" [0.996, 0.992, 0.86] | \n",
" [0.928, 0.994, 0.924] | \n",
" [[233, 1, 0], [2, 239, 0], [33, 0, 202]] | \n",
"
\n",
" \n",
" 4 | \n",
" RF | \n",
" [0.765, 1.0, 1.0] | \n",
" 0.898592 | \n",
" [1.0, 1.0, 0.694] | \n",
" [0.867, 1.0, 0.819] | \n",
" [[234, 0, 0], [0, 241, 0], [72, 0, 163]] | \n",
"
\n",
" \n",
" 5 | \n",
" NB | \n",
" [0.892, 0.737, 0.945] | \n",
" 0.842254 | \n",
" [0.632, 0.942, 0.949] | \n",
" [0.74, 0.827, 0.947] | \n",
" [[148, 73, 13], [14, 227, 0], [4, 8, 223]] | \n",
"
\n",
" \n",
" 6 | \n",
" DTC | \n",
" [0.69, 1.0, 0.625] | \n",
" 0.767606 | \n",
" [0.543, 0.988, 0.766] | \n",
" [0.608, 0.994, 0.688] | \n",
" [[127, 0, 107], [2, 238, 1], [55, 0, 180]] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Model Precision Score Accuracy score Recall Score \\\n",
"0 LR [0.987, 1.0, 1.0] 0.995775 [1.0, 0.996, 0.991] \n",
"1 SVC [0.963, 1.0, 1.0] 0.987324 [1.0, 0.992, 0.97] \n",
"2 SGDC [0.974, 0.975, 0.996] 0.981690 [0.974, 0.983, 0.987] \n",
"3 KNN [0.869, 0.996, 1.0] 0.949296 [0.996, 0.992, 0.86] \n",
"4 RF [0.765, 1.0, 1.0] 0.898592 [1.0, 1.0, 0.694] \n",
"5 NB [0.892, 0.737, 0.945] 0.842254 [0.632, 0.942, 0.949] \n",
"6 DTC [0.69, 1.0, 0.625] 0.767606 [0.543, 0.988, 0.766] \n",
"\n",
" F1 score Confusion Matrix \n",
"0 [0.994, 0.998, 0.996] [[234, 0, 0], [1, 240, 0], [2, 0, 233]] \n",
"1 [0.981, 0.996, 0.985] [[234, 0, 0], [2, 239, 0], [7, 0, 228]] \n",
"2 [0.974, 0.979, 0.991] [[228, 6, 0], [3, 237, 1], [3, 0, 232]] \n",
"3 [0.928, 0.994, 0.924] [[233, 1, 0], [2, 239, 0], [33, 0, 202]] \n",
"4 [0.867, 1.0, 0.819] [[234, 0, 0], [0, 241, 0], [72, 0, 163]] \n",
"5 [0.74, 0.827, 0.947] [[148, 73, 13], [14, 227, 0], [4, 8, 223]] \n",
"6 [0.608, 0.994, 0.688] [[127, 0, 107], [2, 238, 1], [55, 0, 180]] "
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"testset_final_results = []\n",
"\n",
"for name, model in models.items():\n",
" # Evaluate model\n",
" model_results = model.predict(test_x)\n",
"\n",
" p_score = precision_score(test_y, model_results, average=None, labels=[0, 1, 2])\n",
" a_score = accuracy_score(test_y, model_results)\n",
" r_score = recall_score(test_y, model_results, average=None, labels=[0, 1, 2])\n",
" f1_score_result = f1_score(test_y, model_results, average=None, labels=[0, 1, 2])\n",
" cm = confusion_matrix(test_y, model_results, labels=[0, 1, 2])\n",
" testset_final_results.append(( name, round_up_metric_results(p_score), a_score, round_up_metric_results(r_score), round_up_metric_results(f1_score_result), cm ))\n",
"\n",
"\n",
"testset_final_results.sort(key=lambda k: sum(k[4]), reverse=True)\n",
"pd.DataFrame(testset_final_results, columns=[\"Model\", \"Precision Score\", \"Accuracy score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1.4. Dumped model and input scaler using pickle\n",
"\n",
"According to the evaluations, there are multiple good models at the moment, therefore, the best models are LR and Ridge."
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"with open(\"./model/all_sklearn.pkl\", \"wb\") as f:\n",
" pickle.dump(models, f)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"with open(\"./model/LR_model.pkl\", \"wb\") as f:\n",
" pickle.dump(models[\"LR\"], f)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"with open(\"./model/SVC_model.pkl\", \"wb\") as f:\n",
" pickle.dump(models[\"SVC\"], f)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# Dump input scaler\n",
"with open(\"./model/input_scaler.pkl\", \"wb\") as f:\n",
" pickle.dump(sc, f)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.13 (conda)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}