{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Data visualization\n", "import numpy as np\n", "import pandas as pd \n", "\n", "# Keras\n", "from keras.models import Sequential\n", "from keras.layers import Dense\n", "from keras.layers import Dropout\n", "from keras.optimizers import Adam\n", "from keras.utils.np_utils import to_categorical\n", "from keras.callbacks import EarlyStopping, TensorBoard\n", "import keras_tuner as kt\n", "\n", "# Train-Test\n", "from sklearn.model_selection import train_test_split\n", "# Classification Report\n", "from sklearn.metrics import confusion_matrix, precision_recall_fscore_support\n", "\n", "import pickle\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Important Landmarks and Important functions" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Determine important landmarks for plank\n", "IMPORTANT_LMS = [\n", " \"NOSE\",\n", " \"LEFT_SHOULDER\",\n", " \"RIGHT_SHOULDER\",\n", " \"RIGHT_ELBOW\",\n", " \"LEFT_ELBOW\",\n", " \"RIGHT_WRIST\",\n", " \"LEFT_WRIST\",\n", " \"LEFT_HIP\",\n", " \"RIGHT_HIP\",\n", "]\n", "\n", "# Generate all columns of the data frame\n", "\n", "HEADERS = [\"label\"] # Label column\n", "\n", "for lm in IMPORTANT_LMS:\n", " HEADERS += [f\"{lm.lower()}_x\", f\"{lm.lower()}_y\", f\"{lm.lower()}_z\", f\"{lm.lower()}_v\"]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def describe_dataset(dataset_path: str):\n", " '''\n", " Describe dataset\n", " '''\n", "\n", " data = pd.read_csv(dataset_path)\n", " print(f\"Headers: {list(data.columns.values)}\")\n", " print(f'Number of rows: {data.shape[0]} \\nNumber of columns: {data.shape[1]}\\n')\n", " print(f\"Labels: \\n{data['label'].value_counts()}\\n\")\n", " print(f\"Missing values: {data.isnull().values.any()}\\n\")\n", " \n", " duplicate = data[data.duplicated()]\n", " print(f\"Duplicate Rows : {len(duplicate.sum(axis=1))}\")\n", "\n", " return data\n", "\n", "\n", "# Remove duplicate rows (optional)\n", "def remove_duplicate_rows(dataset_path: str):\n", " '''\n", " Remove duplicated data from the dataset then save it to another files\n", " '''\n", " \n", " df = pd.read_csv(dataset_path)\n", " df.drop_duplicates(keep=\"first\", inplace=True)\n", " df.to_csv(f\"cleaned_train.csv\", sep=',', encoding='utf-8', index=False)\n", "\n", "\n", "def round_up_metric_results(results) -> list:\n", " '''Round up metrics results such as precision score, recall score, ...'''\n", " return list(map(lambda el: round(el, 3), results))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Describe Dataset & Split Data" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v']\n", "Number of rows: 15372 \n", "Number of columns: 37\n", "\n", "Labels: \n", "C 8238\n", "L 7134\n", "Name: label, dtype: int64\n", "\n", "Missing values: False\n", "\n", "Duplicate Rows : 0\n" ] } ], "source": [ "# load dataset\n", "df = describe_dataset(\"./train.csv\")\n", "\n", "# Categorizing label\n", "df.loc[df[\"label\"] == \"C\", \"label\"] = 0\n", "df.loc[df[\"label\"] == \"L\", \"label\"] = 1" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "with open(\"./model/input_scaler.pkl\", \"rb\") as f:\n", " sc = pickle.load(f)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Standard Scaling of features\n", "x = df.drop(\"label\", axis = 1)\n", "x = pd.DataFrame(sc.transform(x))\n", "\n", "y = df[\"label\"]\n", "\n", "# # Converting prediction to categorical\n", "y_cat = to_categorical(y)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "x_train, x_test, y_train, y_test = train_test_split(x.values, y_cat, test_size=0.2, random_state=1234)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Build Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.1. Set up" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "stop_early = EarlyStopping(monitor='val_loss', patience=3)\n", "\n", "# Final Results\n", "final_models = {}" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def describe_model(model):\n", " '''\n", " Describe Model architecture\n", " '''\n", " print(f\"Describe models architecture\")\n", " for i, layer in enumerate(model.layers):\n", " number_of_units = layer.units if hasattr(layer, 'units') else 0\n", "\n", " if hasattr(layer, \"activation\"):\n", " print(f\"Layer-{i + 1}: {number_of_units} units, func: \", layer.activation)\n", " else:\n", " print(f\"Layer-{i + 1}: {number_of_units} units, func: None\")\n", "\n", "\n", "def get_best_model(tuner):\n", " '''\n", " Describe and return the best model found from keras tuner\n", " '''\n", " best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]\n", " best_model = tuner.hypermodel.build(best_hps)\n", "\n", " describe_model(best_model)\n", "\n", " print(\"\\nOther params:\")\n", " ignore_params = [\"tuner\", \"activation\", \"layer\", \"epoch\"]\n", " for param, value in best_hps.values.items():\n", " if not any(word in param for word in ignore_params):\n", " print(f\"{param}: {value}\")\n", "\n", " return best_model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.2. Model with 3 layers" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "def model_3l_builder(hp):\n", " model = Sequential()\n", " model.add(Dense(36, input_dim = 36, activation = \"relu\"))\n", "\n", " hp_activation = hp.Choice('activation', values=['relu', 'tanh'])\n", " hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)\n", " hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])\n", "\n", " model.add(Dense(units=hp_layer_1, activation=hp_activation))\n", " model.add(Dense(2, activation = \"softmax\"))\n", "\n", " model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss=\"categorical_crossentropy\", metrics = [\"accuracy\"])\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Trial 30 Complete [00h 00m 46s]\n", "val_accuracy: 0.9980487823486328\n", "\n", "Best val_accuracy So Far: 0.9980487823486328\n", "Total elapsed time: 00h 08m 25s\n", "INFO:tensorflow:Oracle triggered exit\n" ] } ], "source": [ "tuner_3l = kt.Hyperband(\n", " model_3l_builder,\n", " objective='val_accuracy',\n", " max_epochs=10,\n", " directory='keras_tuner_dir',\n", " project_name='keras_tuner_demo',\n", ")\n", "tuner_3l.search(x_train, y_train, validation_data=(x_test, y_test), epochs=10, callbacks=[stop_early])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 448 units, func: \n", "Layer-3: 2 units, func: \n", "\n", "Other params:\n", "learning_rate: 0.001\n", "Epoch 1/100\n", " 5/1230 [..............................] - ETA: 19s - loss: 0.6247 - accuracy: 0.6600 " ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 09:54:28.588878: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - ETA: 0s - loss: 0.0504 - accuracy: 0.9848" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 09:54:39.929268: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - 14s 11ms/step - loss: 0.0504 - accuracy: 0.9848 - val_loss: 0.0889 - val_accuracy: 0.9717\n", "Epoch 2/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0241 - accuracy: 0.9940 - val_loss: 0.0188 - val_accuracy: 0.9948\n", "Epoch 3/100\n", "1230/1230 [==============================] - 13s 10ms/step - loss: 0.0187 - accuracy: 0.9946 - val_loss: 0.0127 - val_accuracy: 0.9964\n", "Epoch 4/100\n", "1230/1230 [==============================] - 13s 10ms/step - loss: 0.0179 - accuracy: 0.9950 - val_loss: 0.0140 - val_accuracy: 0.9958\n", "Epoch 5/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0145 - accuracy: 0.9958 - val_loss: 0.0211 - val_accuracy: 0.9958\n", "Epoch 6/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0143 - accuracy: 0.9958 - val_loss: 0.0093 - val_accuracy: 0.9984\n", "Epoch 7/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0118 - accuracy: 0.9966 - val_loss: 0.0077 - val_accuracy: 0.9984\n", "Epoch 8/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0120 - accuracy: 0.9961 - val_loss: 0.0112 - val_accuracy: 0.9977\n", "Epoch 9/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0121 - accuracy: 0.9959 - val_loss: 0.0073 - val_accuracy: 0.9984\n", "Epoch 10/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0110 - accuracy: 0.9963 - val_loss: 0.0108 - val_accuracy: 0.9971\n", "Epoch 11/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0097 - accuracy: 0.9970 - val_loss: 0.0110 - val_accuracy: 0.9971\n", "Epoch 12/100\n", "1230/1230 [==============================] - 13s 11ms/step - loss: 0.0098 - accuracy: 0.9972 - val_loss: 0.0107 - val_accuracy: 0.9967\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_3l = get_best_model(tuner_3l)\n", "model_3l.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "final_models[\"3_layers\"] = model_3l" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.3. Model with 5 layers" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "def model_5l_builder(hp):\n", " model = Sequential()\n", " model.add(Dense(36, input_dim = 36, activation = \"relu\"))\n", "\n", " hp_activation = hp.Choice('activation', values=['relu', 'tanh'])\n", " hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)\n", " hp_layer_2 = hp.Int('layer_2', min_value=32, max_value=512, step=32)\n", " hp_layer_3 = hp.Int('layer_3', min_value=32, max_value=512, step=32)\n", " hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])\n", "\n", " model.add(Dense(units=hp_layer_1, activation=hp_activation))\n", " model.add(Dense(units=hp_layer_2, activation=hp_activation))\n", " model.add(Dense(units=hp_layer_3, activation=hp_activation))\n", " model.add(Dense(2, activation = \"softmax\"))\n", "\n", " model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss=\"categorical_crossentropy\", metrics = [\"accuracy\"])\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Trial 30 Complete [00h 00m 54s]\n", "val_accuracy: 0.9973983764648438\n", "\n", "Best val_accuracy So Far: 0.9986991882324219\n", "Total elapsed time: 00h 11m 12s\n", "INFO:tensorflow:Oracle triggered exit\n" ] } ], "source": [ "tuner_5l = kt.Hyperband(\n", " model_5l_builder,\n", " objective='val_accuracy',\n", " max_epochs=10,\n", " directory='keras_tuner_dir',\n", " project_name='keras_tuner_demo_2'\n", ")\n", "tuner_5l.search(x_train, y_train, validation_data=(x_test, y_test), epochs=10, callbacks=[stop_early, TensorBoard(\"./keras_tuner_dir/logs\")])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 160 units, func: \n", "Layer-3: 352 units, func: \n", "Layer-4: 64 units, func: \n", "Layer-5: 2 units, func: \n", "\n", "Other params:\n", "learning_rate: 0.001\n", "Epoch 1/100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 10:15:07.538823: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - ETA: 0s - loss: 0.0494 - accuracy: 0.9848" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 10:15:21.397335: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0494 - accuracy: 0.9848 - val_loss: 0.0152 - val_accuracy: 0.9958\n", "Epoch 2/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0238 - accuracy: 0.9932 - val_loss: 0.0145 - val_accuracy: 0.9954\n", "Epoch 3/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0193 - accuracy: 0.9947 - val_loss: 0.0146 - val_accuracy: 0.9971\n", "Epoch 4/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0169 - accuracy: 0.9950 - val_loss: 0.0140 - val_accuracy: 0.9964\n", "Epoch 5/100\n", "1230/1230 [==============================] - 15s 13ms/step - loss: 0.0160 - accuracy: 0.9960 - val_loss: 0.0154 - val_accuracy: 0.9964\n", "Epoch 6/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0135 - accuracy: 0.9963 - val_loss: 0.0126 - val_accuracy: 0.9961\n", "Epoch 7/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0126 - accuracy: 0.9960 - val_loss: 0.0098 - val_accuracy: 0.9971\n", "Epoch 8/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0106 - accuracy: 0.9966 - val_loss: 0.0090 - val_accuracy: 0.9971\n", "Epoch 9/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0110 - accuracy: 0.9966 - val_loss: 0.0146 - val_accuracy: 0.9974\n", "Epoch 10/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0098 - accuracy: 0.9963 - val_loss: 0.0257 - val_accuracy: 0.9922\n", "Epoch 11/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0106 - accuracy: 0.9968 - val_loss: 0.0138 - val_accuracy: 0.9961\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_5l = get_best_model(tuner_5l)\n", "model_5l.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "final_models[\"5_layers\"] = model_5l" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.4. Model with 7 layers include Dropout" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "def model_7lD_builder(hp):\n", " model = Sequential()\n", " model.add(Dense(36, input_dim = 36, activation = \"relu\"))\n", "\n", " hp_activation = hp.Choice('activation', values=['relu', 'tanh'])\n", " hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)\n", " hp_layer_2 = hp.Int('layer_2', min_value=32, max_value=512, step=32)\n", " hp_layer_3 = hp.Int('layer_3', min_value=32, max_value=512, step=32)\n", " hp_dropout_1 = hp.Float('dropout_1', min_value=0.1, max_value=0.5, step=0.1)\n", " hp_dropout_2 = hp.Float('dropout_2', min_value=0.1, max_value=0.5, step=0.1)\n", " hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])\n", "\n", " model.add(Dense(units=hp_layer_1, activation=hp_activation))\n", " model.add(Dropout(rate=hp_dropout_1))\n", " model.add(Dense(units=hp_layer_2, activation=hp_activation))\n", " model.add(Dropout(rate=hp_dropout_2))\n", " model.add(Dense(units=hp_layer_3, activation=hp_activation))\n", " model.add(Dense(2, activation = \"softmax\"))\n", "\n", " model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss=\"categorical_crossentropy\", metrics = [\"accuracy\"])\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Trial 30 Complete [00h 01m 04s]\n", "accuracy: 0.9945515394210815\n", "\n", "Best accuracy So Far: 0.9969098567962646\n", "Total elapsed time: 00h 12m 19s\n", "INFO:tensorflow:Oracle triggered exit\n" ] } ], "source": [ "tuner_7lD = kt.Hyperband(\n", " model_7lD_builder,\n", " objective='accuracy',\n", " max_epochs=10,\n", " directory='keras_tuner_dir',\n", " project_name='keras_tuner_demo_3'\n", ")\n", "tuner_7lD.search(x_train, y_train, validation_data=(x_test, y_test), epochs=10, callbacks=[stop_early, TensorBoard(\"./keras_tuner_dir/logs\")])" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 320 units, func: \n", "Layer-3: 0 units, func: None\n", "Layer-4: 96 units, func: \n", "Layer-5: 0 units, func: None\n", "Layer-6: 448 units, func: \n", "Layer-7: 2 units, func: \n", "\n", "Other params:\n", "dropout_1: 0.30000000000000004\n", "dropout_2: 0.30000000000000004\n", "learning_rate: 0.001\n", "Epoch 1/100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 10:37:14.947724: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - ETA: 0s - loss: 0.0592 - accuracy: 0.9811" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 10:37:31.869492: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - 20s 16ms/step - loss: 0.0592 - accuracy: 0.9811 - val_loss: 0.0177 - val_accuracy: 0.9961\n", "Epoch 2/100\n", "1230/1230 [==============================] - 19s 16ms/step - loss: 0.0235 - accuracy: 0.9934 - val_loss: 0.0164 - val_accuracy: 0.9951\n", "Epoch 3/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0206 - accuracy: 0.9945 - val_loss: 0.0150 - val_accuracy: 0.9945\n", "Epoch 4/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0175 - accuracy: 0.9951 - val_loss: 0.0160 - val_accuracy: 0.9961\n", "Epoch 5/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0176 - accuracy: 0.9950 - val_loss: 0.0131 - val_accuracy: 0.9964\n", "Epoch 6/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0137 - accuracy: 0.9960 - val_loss: 0.0091 - val_accuracy: 0.9984\n", "Epoch 7/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0141 - accuracy: 0.9959 - val_loss: 0.0121 - val_accuracy: 0.9958\n", "Epoch 8/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0118 - accuracy: 0.9964 - val_loss: 0.0089 - val_accuracy: 0.9967\n", "Epoch 9/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0097 - accuracy: 0.9969 - val_loss: 0.0155 - val_accuracy: 0.9974\n", "Epoch 10/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0154 - accuracy: 0.9964 - val_loss: 0.0093 - val_accuracy: 0.9974\n", "Epoch 11/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0118 - accuracy: 0.9970 - val_loss: 0.0073 - val_accuracy: 0.9987\n", "Epoch 12/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0103 - accuracy: 0.9971 - val_loss: 0.0185 - val_accuracy: 0.9980\n", "Epoch 13/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0112 - accuracy: 0.9971 - val_loss: 0.0105 - val_accuracy: 0.9977\n", "Epoch 14/100\n", "1230/1230 [==============================] - 19s 15ms/step - loss: 0.0132 - accuracy: 0.9965 - val_loss: 0.0183 - val_accuracy: 0.9964\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_7lD = get_best_model(tuner_7lD)\n", "model_7lD.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "final_models[\"7_layers_with_dropout\"] = model_7lD" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.5. Model with 7 layers" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "def model_7l_builder(hp):\n", " model = Sequential()\n", " model.add(Dense(36, input_dim = 36, activation = \"relu\"))\n", "\n", " hp_activation = hp.Choice('activation', values=['relu', 'tanh'])\n", " hp_layer_1 = hp.Int('layer_1', min_value=32, max_value=512, step=32)\n", " hp_layer_2 = hp.Int('layer_2', min_value=32, max_value=512, step=32)\n", " hp_layer_3 = hp.Int('layer_3', min_value=32, max_value=512, step=32)\n", " hp_layer_4 = hp.Int('layer_4', min_value=32, max_value=512, step=32)\n", " hp_layer_5 = hp.Int('layer_5', min_value=32, max_value=512, step=32)\n", " hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])\n", "\n", " model.add(Dense(units=hp_layer_1, activation=hp_activation))\n", " model.add(Dense(units=hp_layer_2, activation=hp_activation))\n", " model.add(Dense(units=hp_layer_3, activation=hp_activation))\n", " model.add(Dense(units=hp_layer_4, activation=hp_activation))\n", " model.add(Dense(units=hp_layer_5, activation=hp_activation))\n", " model.add(Dense(2, activation = \"softmax\"))\n", "\n", " model.compile(optimizer=Adam(learning_rate=hp_learning_rate), loss=\"categorical_crossentropy\", metrics = [\"accuracy\"])\n", " \n", " return model" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Trial 30 Complete [00h 00m 51s]\n", "val_accuracy: 0.9973983764648438\n", "\n", "Best val_accuracy So Far: 0.9977235794067383\n", "Total elapsed time: 00h 02m 22s\n", "INFO:tensorflow:Oracle triggered exit\n" ] } ], "source": [ "tuner_7l = kt.Hyperband(\n", " model_7l_builder,\n", " objective='val_accuracy',\n", " max_epochs=10,\n", " directory='keras_tuner_dir',\n", " project_name='keras_tuner_demo_6'\n", ")\n", "tuner_7l.search(x_train, y_train, validation_data=(x_test, y_test), epochs=10, callbacks=[stop_early, TensorBoard(\"./keras_tuner_dir/logs\")])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 192 units, func: \n", "Layer-3: 320 units, func: \n", "Layer-4: 448 units, func: \n", "Layer-5: 224 units, func: \n", "Layer-6: 448 units, func: \n", "Layer-7: 2 units, func: \n", "\n", "Other params:\n", "learning_rate: 0.0001\n", "Epoch 1/100\n", " 1/1230 [..............................] - ETA: 8:04 - loss: 0.6561 - accuracy: 0.6000" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 14:29:10.795739: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - ETA: 0s - loss: 0.0720 - accuracy: 0.9748" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2022-11-23 14:29:24.355056: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0720 - accuracy: 0.9748 - val_loss: 0.0219 - val_accuracy: 0.9941\n", "Epoch 2/100\n", "1230/1230 [==============================] - 15s 12ms/step - loss: 0.0319 - accuracy: 0.9910 - val_loss: 0.0353 - val_accuracy: 0.9893\n", "Epoch 3/100\n", "1230/1230 [==============================] - 15s 12ms/step - loss: 0.0262 - accuracy: 0.9927 - val_loss: 0.0149 - val_accuracy: 0.9958\n", "Epoch 4/100\n", "1230/1230 [==============================] - 15s 12ms/step - loss: 0.0221 - accuracy: 0.9936 - val_loss: 0.0125 - val_accuracy: 0.9964\n", "Epoch 5/100\n", "1230/1230 [==============================] - 15s 12ms/step - loss: 0.0195 - accuracy: 0.9943 - val_loss: 0.0171 - val_accuracy: 0.9951\n", "Epoch 6/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0186 - accuracy: 0.9942 - val_loss: 0.0128 - val_accuracy: 0.9971\n", "Epoch 7/100\n", "1230/1230 [==============================] - 16s 13ms/step - loss: 0.0170 - accuracy: 0.9951 - val_loss: 0.0132 - val_accuracy: 0.9974\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_7l = get_best_model(tuner_7l)\n", "model_7l.fit(x_train, y_train, epochs=100, batch_size=10, validation_data=(x_test, y_test), callbacks=[stop_early])" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "final_models[\"7_layers\"] = model_7l" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.6. Describe final models" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3_layers: Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 448 units, func: \n", "Layer-3: 2 units, func: \n", "\n", "5_layers: Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 160 units, func: \n", "Layer-3: 352 units, func: \n", "Layer-4: 64 units, func: \n", "Layer-5: 2 units, func: \n", "\n", "7_layers_with_dropout: Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 320 units, func: \n", "Layer-3: 0 units, func: None\n", "Layer-4: 96 units, func: \n", "Layer-5: 0 units, func: None\n", "Layer-6: 448 units, func: \n", "Layer-7: 2 units, func: \n", "\n", "7_layers: Describe models architecture\n", "Layer-1: 36 units, func: \n", "Layer-2: 192 units, func: \n", "Layer-3: 320 units, func: \n", "Layer-4: 448 units, func: \n", "Layer-5: 224 units, func: \n", "Layer-6: 448 units, func: \n", "Layer-7: 2 units, func: \n", "\n" ] } ], "source": [ "for name, model in final_models.items():\n", " print(f\"{name}: \", end=\"\")\n", " describe_model(model)\n", " print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Model Evaluation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4.1. Train set evaluation" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelPrecision ScoreRecall ScoreF1 scoreConfusion Matrix
07_layers[0.998, 0.997][0.998, 0.997][0.998, 0.997][[1675, 4], [4, 1392]]
13_layers[0.997, 0.996][0.997, 0.996][0.997, 0.996][[1674, 5], [5, 1391]]
27_layers_with_dropout[0.998, 0.995][0.996, 0.997][0.997, 0.996][[1672, 7], [4, 1392]]
35_layers[0.996, 0.996][0.996, 0.996][0.996, 0.996][[1673, 6], [6, 1390]]
\n", "
" ], "text/plain": [ " Model Precision Score Recall Score F1 score \\\n", "0 7_layers [0.998, 0.997] [0.998, 0.997] [0.998, 0.997] \n", "1 3_layers [0.997, 0.996] [0.997, 0.996] [0.997, 0.996] \n", "2 7_layers_with_dropout [0.998, 0.995] [0.996, 0.997] [0.997, 0.996] \n", "3 5_layers [0.996, 0.996] [0.996, 0.996] [0.996, 0.996] \n", "\n", " Confusion Matrix \n", "0 [[1675, 4], [4, 1392]] \n", "1 [[1674, 5], [5, 1391]] \n", "2 [[1672, 7], [4, 1392]] \n", "3 [[1673, 6], [6, 1390]] " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_set_results = []\n", "\n", "for name, model in final_models.items():\n", " # Evaluate model\n", " predict_x = model.predict(x_test, verbose=False) \n", " y_pred_class = np.argmax(predict_x, axis=1)\n", " y_test_class = np.argmax(y_test, axis=1)\n", "\n", " cm = confusion_matrix(y_test_class, y_pred_class, labels=[0, 1])\n", " (p_score, r_score, f_score, _) = precision_recall_fscore_support(y_test_class, y_pred_class, labels=[0, 1])\n", " \n", " train_set_results.append(( name, round_up_metric_results(p_score), round_up_metric_results(r_score), round_up_metric_results(f_score), cm ))\n", "\n", "train_set_results.sort(key=lambda k: sum(k[3]), reverse=True)\n", "pd.DataFrame(train_set_results, columns=[\"Model\", \"Precision Score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4.2. Test set evaluation" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Headers: ['label', 'nose_x', 'nose_y', 'nose_z', 'nose_v', 'left_shoulder_x', 'left_shoulder_y', 'left_shoulder_z', 'left_shoulder_v', 'right_shoulder_x', 'right_shoulder_y', 'right_shoulder_z', 'right_shoulder_v', 'right_elbow_x', 'right_elbow_y', 'right_elbow_z', 'right_elbow_v', 'left_elbow_x', 'left_elbow_y', 'left_elbow_z', 'left_elbow_v', 'right_wrist_x', 'right_wrist_y', 'right_wrist_z', 'right_wrist_v', 'left_wrist_x', 'left_wrist_y', 'left_wrist_z', 'left_wrist_v', 'left_hip_x', 'left_hip_y', 'left_hip_z', 'left_hip_v', 'right_hip_x', 'right_hip_y', 'right_hip_z', 'right_hip_v']\n", "Number of rows: 604 \n", "Number of columns: 37\n", "\n", "Labels: \n", "C 339\n", "L 265\n", "Name: label, dtype: int64\n", "\n", "Missing values: False\n", "\n", "Duplicate Rows : 0\n" ] } ], "source": [ "# load dataset\n", "test_df = describe_dataset(\"./test.csv\")\n", "\n", "# Categorizing label\n", "test_df.loc[test_df[\"label\"] == \"C\", \"label\"] = 0\n", "test_df.loc[test_df[\"label\"] == \"L\", \"label\"] = 1" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Standard Scaling of features\n", "test_x = test_df.drop(\"label\", axis = 1)\n", "test_x = pd.DataFrame(sc.transform(test_x))\n", "\n", "test_y = test_df[\"label\"]\n", "\n", "# # Converting prediction to categorical\n", "test_y_cat = to_categorical(test_y)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2022-11-25 15:34:28.174069: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n", "2022-11-25 15:34:28.287586: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n", "2022-11-25 15:34:28.423087: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n", "2022-11-25 15:34:28.546100: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:113] Plugin optimizer for device_type GPU is enabled.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelPrecision ScoreRecall ScoreF1 scoreConfusion Matrix
07_layers[0.944, 1.0][1.0, 0.925][0.971, 0.961][[339, 0], [20, 245]]
15_layers[0.926, 1.0][1.0, 0.898][0.962, 0.946][[339, 0], [27, 238]]
27_layers_with_dropout[0.909, 0.963][0.973, 0.875][0.94, 0.917][[330, 9], [33, 232]]
33_layers[0.896, 0.983][0.988, 0.853][0.94, 0.913][[335, 4], [39, 226]]
\n", "
" ], "text/plain": [ " Model Precision Score Recall Score F1 score \\\n", "0 7_layers [0.944, 1.0] [1.0, 0.925] [0.971, 0.961] \n", "1 5_layers [0.926, 1.0] [1.0, 0.898] [0.962, 0.946] \n", "2 7_layers_with_dropout [0.909, 0.963] [0.973, 0.875] [0.94, 0.917] \n", "3 3_layers [0.896, 0.983] [0.988, 0.853] [0.94, 0.913] \n", "\n", " Confusion Matrix \n", "0 [[339, 0], [20, 245]] \n", "1 [[339, 0], [27, 238]] \n", "2 [[330, 9], [33, 232]] \n", "3 [[335, 4], [39, 226]] " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_set_results = []\n", "\n", "for name, model in final_models.items():\n", " # Evaluate model\n", " predict_x = model.predict(test_x, verbose=False) \n", " y_pred_class = np.argmax(predict_x, axis=1)\n", " y_test_class = np.argmax(test_y_cat, axis=1)\n", "\n", " cm = confusion_matrix(y_test_class, y_pred_class, labels=[0, 1])\n", " (p_score, r_score, f_score, _) = precision_recall_fscore_support(y_test_class, y_pred_class, labels=[0, 1])\n", " \n", " test_set_results.append(( name, round_up_metric_results(p_score), round_up_metric_results(r_score), round_up_metric_results(f_score), cm ))\n", "\n", "test_set_results.sort(key=lambda k: k[1] + k[2] + k[3], reverse=True)\n", "pd.DataFrame(test_set_results, columns=[\"Model\", \"Precision Score\", \"Recall Score\", \"F1 score\", \"Confusion Matrix\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Dumped Model" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: ram://4145d713-d810-484c-b518-b4ae694e4919/assets\n" ] } ], "source": [ "# Dump the best model to a pickle file\n", "with open(\"./model/bicep_dp.pkl\", \"wb\") as f:\n", " pickle.dump(final_models[\"7_layers\"], f)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Assets written to: ram://5ccb0e7f-b3f8-4602-9b3c-2ae89d1a2d69/assets\n", "INFO:tensorflow:Assets written to: ram://5d2d95b4-ff82-487d-bd98-ba859e8eced0/assets\n", "INFO:tensorflow:Assets written to: ram://557449b4-6368-4822-a75f-79675a055ab9/assets\n", "INFO:tensorflow:Assets written to: ram://4857368e-b747-43dd-9b2d-6e986317f4b8/assets\n" ] } ], "source": [ "# Dump final results\n", "with open(\"./model/all_models.pkl\", \"wb\") as f:\n", " pickle.dump(final_models, f)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.8.13 (conda)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "9260f401923fb5c4108c543a7d176de9733d378b3752e49535ad7c43c2271b65" } } }, "nbformat": 4, "nbformat_minor": 2 }