Spaces:

erjonb
/

secom

Sleeping

App Files Files Community

erjonb commited on May 25, 2023

Commit

3f6fe9c

1 Parent(s): 51f4ca9

Upload P2 - Secom Notebook2 - Mercury.ipynb

Browse files

Files changed (1) hide show

P2 - Secom Notebook2 - Mercury.ipynb +159 -143

P2 - Secom Notebook2 - Mercury.ipynb CHANGED Viewed

@@ -26,8 +26,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 357,
-   "metadata": {},
    "outputs": [],
    "source": [
     "# import pandas for data manipulation\n",
@@ -53,14 +57,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 358,
-   "metadata": {},
    "outputs": [
     {
      "data": {
       "application/mercury+json": {
        "allow_download": true,
-       "code_uid": "App.0.40.24.1-randd9fe9ae5",
        "continuous_update": false,
        "description": "Recumpute everything dynamically",
        "full_screen": true,
@@ -92,8 +100,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 359,
-   "metadata": {},
    "outputs": [],
    "source": [
     "# Read the features data from the the url of csv into pandas dataframes and rename the columns to F1, F2, F3, etc.\n",
@@ -129,24 +141,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 360,
-   "metadata": {},
    "outputs": [
     {
      "data": {
       "application/mercury+json": {
-       "code_uid": "Text.0.40.15.11-randec98731b",
        "disabled": false,
        "hidden": false,
        "label": "Test Size Ratio",
-       "model_id": "2157a02ec6544d86bd12bf1e3a15f65e",
        "rows": 1,
        "url_key": "",
        "value": "0.25",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2157a02ec6544d86bd12bf1e3a15f65e",
        "version_major": 2,
        "version_minor": 0
       },
@@ -160,18 +176,18 @@
     {
      "data": {
       "application/mercury+json": {
-       "code_uid": "Text.0.40.15.14-randfa24ca10",
        "disabled": false,
        "hidden": false,
        "label": "Random State Integer",
-       "model_id": "cdaf85c404494bae95a32286425b9034",
        "rows": 1,
        "url_key": "",
        "value": "13",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cdaf85c404494bae95a32286425b9034",
        "version_major": 2,
        "version_minor": 0
       },
@@ -220,8 +236,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 361,
-   "metadata": {},
    "outputs": [],
    "source": [
     "def columns_to_drop(df,drop_duplicates='yes', missing_values_threshold=100, variance_threshold=0, \n",
@@ -302,7 +322,6 @@
     "   global correlation_threshold_var\n",
     "   correlation_threshold_var = correlation_threshold\n",
     "   \n",
-    "   print(type(dropped))\n",
     "   return dropped"
    ]
   },
@@ -320,8 +339,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 362,
-   "metadata": {},
    "outputs": [],
    "source": [
     "def outlier_removal(z_df, z_threshold=4):\n",
@@ -369,8 +392,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 363,
-   "metadata": {},
    "outputs": [],
    "source": [
     "# define a function to scale the dataframe using different scaling models\n",
@@ -444,8 +471,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 364,
-   "metadata": {},
    "outputs": [],
    "source": [
     "# define a function to impute missing values using different imputation models\n",
@@ -529,8 +560,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 365,
-   "metadata": {},
    "outputs": [],
    "source": [
     "def feature_selection(method, X_train, y_train):\n",
@@ -615,8 +650,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 366,
-   "metadata": {},
    "outputs": [],
    "source": [
     "#define a function to oversample and understamble the imbalance in the training set\n",
@@ -632,7 +671,7 @@
     "        sm = SMOTE(random_state=42)\n",
     "        X_train_res, y_train_res = sm.fit_resample(X_train, y_train)\n",
     "        imbalance_report0 = 'Shape of the training set after oversampling with SMOTE: ', X_train_res.shape\n",
-    "        imbalance_report1 = 'Value counts of the target variable after oversampling with SMOTE: \\n', y_train_res.value_counts()\n",
     "        imbalance_var = 'smote'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
@@ -641,7 +680,7 @@
     "        rus = RandomUnderSampler(random_state=42)\n",
     "        X_train_res, y_train_res = rus.fit_resample(X_train, y_train)\n",
     "        imbalance_report0 = 'Shape of the training set after undersampling with RandomUnderSampler: ', X_train_res.shape\n",
-    "        imbalance_report1 = 'Value counts of the target variable after undersampling with RandomUnderSampler: \\n', y_train_res.value_counts()\n",
     "        imbalance_var = 'undersampling'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
@@ -650,7 +689,7 @@
     "        ros = RandomOverSampler(random_state=42)\n",
     "        X_train_res, y_train_res = ros.fit_resample(X_train, y_train)\n",
     "        imbalance_report0 = 'Shape of the training set after oversampling with RandomOverSampler: ', X_train_res.shape\n",
-    "        imbalance_report1 = 'Value counts of the target variable after oversampling with RandomOverSampler: \\n', y_train_res.value_counts()\n",
     "        imbalance_var = 'rose'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
@@ -659,7 +698,7 @@
     "        X_train_res = X_train\n",
     "        y_train_res = y_train\n",
     "        imbalance_report0 = 'Shape of the training set after no resampling: ', X_train_res.shape\n",
-    "        imbalance_report1 = 'Value counts of the target variable after no resampling: \\n', y_train_res.value_counts()\n",
     "        imbalance_var = 'none'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
@@ -684,8 +723,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 367,
-   "metadata": {},
    "outputs": [],
    "source": [
     "# define a function where you can choose the model you want to use to train the data\n",
@@ -757,8 +800,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 368,
-   "metadata": {},
    "outputs": [],
    "source": [
     "evaluation_score_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1-score', 'model_variables'])\n",
@@ -779,15 +826,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 369,
-   "metadata": {},
    "outputs": [],
    "source": [
     "#define a function that prints the strings below\n",
     "def evaluate_models(model='random_forest'):\n",
     "     \n",
-    "    print('--------------------------------------------------')\n",
-    "\n",
     "    all_models = ['random_forest', 'logistic_regression', 'knn', 'svm', 'naive_bayes', 'decision_tree', 'xgboost']\n",
     "    evaluation_score_append = []\n",
     "    evaluation_count_append = []\n",
@@ -882,24 +931,28 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 370,
-   "metadata": {},
    "outputs": [
     {
      "data": {
       "application/mercury+json": {
-       "code_uid": "Text.0.40.15.8-rand4a43baec",
        "disabled": false,
        "hidden": false,
        "label": "Missing Value Threeshold",
-       "model_id": "b2736e53364e4041b6ce10b9e1e1f7d8",
        "rows": 1,
        "url_key": "",
        "value": "50",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b2736e53364e4041b6ce10b9e1e1f7d8",
        "version_major": 2,
        "version_minor": 0
       },
@@ -913,18 +966,18 @@
     {
      "data": {
       "application/mercury+json": {
-       "code_uid": "Text.0.40.15.11-rand6f838484",
        "disabled": false,
        "hidden": false,
        "label": "Variance Threshold",
-       "model_id": "97419c4a49954b8490aa311870d010b9",
        "rows": 1,
        "url_key": "",
        "value": "0.05",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "97419c4a49954b8490aa311870d010b9",
        "version_major": 2,
        "version_minor": 0
       },
@@ -938,18 +991,18 @@
     {
      "data": {
       "application/mercury+json": {
-       "code_uid": "Text.0.40.15.14-rand6243cbfa",
        "disabled": false,
        "hidden": false,
        "label": "Correlation Threshold",
-       "model_id": "e9f072dfb6a241bca69f960fa0aa06a1",
        "rows": 1,
        "url_key": "",
        "value": "0.95",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e9f072dfb6a241bca69f960fa0aa06a1",
        "version_major": 2,
        "version_minor": 0
       },
@@ -969,17 +1022,17 @@
         4,
         5
        ],
-       "code_uid": "Select.0.40.16.18-randa184b437",
        "disabled": false,
        "hidden": false,
        "label": "Outlier Removal Threshold",
-       "model_id": "0be493385a154210b3c7685a3bd1074f",
        "url_key": "",
        "value": 5,
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0be493385a154210b3c7685a3bd1074f",
        "version_major": 2,
        "version_minor": 0
       },
@@ -999,17 +1052,17 @@
         "minmax",
         "robust"
        ],
-       "code_uid": "Select.0.40.16.25-rand163d8992",
        "disabled": false,
        "hidden": false,
        "label": "Scaling Variables",
-       "model_id": "985eab871677416f9c14ea528b0fd561",
        "url_key": "",
        "value": "standard",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "985eab871677416f9c14ea528b0fd561",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1029,17 +1082,17 @@
         "knn",
         "most_frequent"
        ],
-       "code_uid": "Select.0.40.16.29-randb76d7c1d",
        "disabled": false,
        "hidden": false,
        "label": "Imputation Methods",
-       "model_id": "eef6b42e02914c98b7e7ed8d0a18df98",
        "url_key": "",
        "value": "median",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "eef6b42e02914c98b7e7ed8d0a18df98",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1060,17 +1113,17 @@
         "pca",
         "boruta"
        ],
-       "code_uid": "Select.0.40.16.34-rand254bd909",
        "disabled": false,
        "hidden": false,
        "label": "Feature Selection",
-       "model_id": "f4fc58b330a24bfe8699e0602178b0e1",
        "url_key": "",
        "value": "lasso",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f4fc58b330a24bfe8699e0602178b0e1",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1090,17 +1143,17 @@
         "undersampling",
         "rose"
        ],
-       "code_uid": "Select.0.40.16.38-rand75e4d938",
        "disabled": false,
        "hidden": false,
        "label": "Imbalance Treatment",
-       "model_id": "965a81a69265473a830f8eec5e8ba2df",
        "url_key": "",
        "value": "smote",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "965a81a69265473a830f8eec5e8ba2df",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1123,17 +1176,17 @@
         "decision_tree",
         "xgboost"
        ],
-       "code_uid": "Select.0.40.16.42-rand1bbd78ac",
        "disabled": false,
        "hidden": false,
        "label": "Model Selection",
-       "model_id": "0d1b1477e14b44b99d00dc89dffb70cb",
        "url_key": "",
        "value": "random_forest",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0d1b1477e14b44b99d00dc89dffb70cb",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1143,13 +1196,6 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'list'>\n"
-     ]
     }
    ],
    "source": [
@@ -1245,17 +1291,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 371,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "--------------------------------------------------\n"
-     ]
     }
-   ],
    "source": [
     "evaluation_score_output, evaluation_counts_output = evaluate_models(input_model)"
    ]
@@ -1263,62 +1305,31 @@
   {
    "attachments": {},
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "#### **Confusion Matrix**"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 372,
-   "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Accuracy</th>\n",
-       "      <th>Precision</th>\n",
-       "      <th>Recall</th>\n",
-       "      <th>F1-score</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0.89</td>\n",
-       "      <td>0.15</td>\n",
-       "      <td>0.15</td>\n",
-       "      <td>0.15</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   Accuracy  Precision  Recall  F1-score\n",
-       "0      0.89       0.15    0.15      0.15"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
     },
     {
      "data": {
@@ -1343,29 +1354,36 @@
     "    show_normed=True\n",
     ")\n",
     "\n",
-    "display(evaluation_score_output[['Accuracy', 'Precision', 'Recall', 'F1-score']])"
    ]
   },
   {
    "attachments": {},
    "cell_type": "markdown",
-   "metadata": {},
    "source": [
     "### **Transformations Report**"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 373,
-   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "------------------------------------------\n",
       "FEATURE REMOVAL\n",
-      "('Shape of the dataframe is:', (1175, 590))\n",
       "('the number of columns dropped due to duplications is: ', 104)\n",
       "('the number of columns dropped due to missing values is: ', 28)\n",
       "('the number of columns dropped due to low variance is: ', 189)\n",
@@ -1391,7 +1409,7 @@
       "------------------------------------------\n",
       "IMBALANCE TREATMENT\n",
       "('Shape of the training set after oversampling with SMOTE: ', (2194, 14))\n",
-      "('Value counts of the target variable after oversampling with SMOTE: \\n', pass/fail\n",
       "0            1097\n",
       "1            1097\n",
       "dtype: int64)\n"
@@ -1399,9 +1417,7 @@
     }
    ],
    "source": [
-    "print('------------------------------------------')\n",
     "print('FEATURE REMOVAL')\n",
-    "print(feature_removal_report0)\n",
     "print(feature_removal_report1)\n",
     "print(feature_removal_report2)\n",
     "print(feature_removal_report3)\n",

   },
   {
    "cell_type": "code",
+   "execution_count": 431,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "# import pandas for data manipulation\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 432,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [
     {
      "data": {
       "application/mercury+json": {
        "allow_download": true,
+       "code_uid": "App.0.40.24.1-randf68a3764",
        "continuous_update": false,
        "description": "Recumpute everything dynamically",
        "full_screen": true,
   },
   {
    "cell_type": "code",
+   "execution_count": 433,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "# Read the features data from the the url of csv into pandas dataframes and rename the columns to F1, F2, F3, etc.\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 434,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [
     {
      "data": {
       "application/mercury+json": {
+       "code_uid": "Text.0.40.15.11-randa5faa9c1",
        "disabled": false,
        "hidden": false,
        "label": "Test Size Ratio",
+       "model_id": "a2eb64736c1146fc835a6b2afa84c9c8",
        "rows": 1,
        "url_key": "",
        "value": "0.25",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a2eb64736c1146fc835a6b2afa84c9c8",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/mercury+json": {
+       "code_uid": "Text.0.40.15.14-rand83abdf01",
        "disabled": false,
        "hidden": false,
        "label": "Random State Integer",
+       "model_id": "7c9d97ed67cb4252a11f2802fc495482",
        "rows": 1,
        "url_key": "",
        "value": "13",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7c9d97ed67cb4252a11f2802fc495482",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 435,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "def columns_to_drop(df,drop_duplicates='yes', missing_values_threshold=100, variance_threshold=0, \n",
     "   global correlation_threshold_var\n",
     "   correlation_threshold_var = correlation_threshold\n",
     "   \n",
     "   return dropped"
    ]
   },
   },
   {
    "cell_type": "code",
+   "execution_count": 436,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "def outlier_removal(z_df, z_threshold=4):\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 437,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "# define a function to scale the dataframe using different scaling models\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 438,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "# define a function to impute missing values using different imputation models\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 439,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "def feature_selection(method, X_train, y_train):\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 440,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "#define a function to oversample and understamble the imbalance in the training set\n",
     "        sm = SMOTE(random_state=42)\n",
     "        X_train_res, y_train_res = sm.fit_resample(X_train, y_train)\n",
     "        imbalance_report0 = 'Shape of the training set after oversampling with SMOTE: ', X_train_res.shape\n",
+    "        imbalance_report1 = 'Value counts of the target variable after oversampling with SMOTE: ', y_train_res.value_counts()\n",
     "        imbalance_var = 'smote'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
     "        rus = RandomUnderSampler(random_state=42)\n",
     "        X_train_res, y_train_res = rus.fit_resample(X_train, y_train)\n",
     "        imbalance_report0 = 'Shape of the training set after undersampling with RandomUnderSampler: ', X_train_res.shape\n",
+    "        imbalance_report1 = 'Value counts of the target variable after undersampling with RandomUnderSampler: ', y_train_res.value_counts()\n",
     "        imbalance_var = 'undersampling'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
     "        ros = RandomOverSampler(random_state=42)\n",
     "        X_train_res, y_train_res = ros.fit_resample(X_train, y_train)\n",
     "        imbalance_report0 = 'Shape of the training set after oversampling with RandomOverSampler: ', X_train_res.shape\n",
+    "        imbalance_report1 = 'Value counts of the target variable after oversampling with RandomOverSampler: ', y_train_res.value_counts()\n",
     "        imbalance_var = 'rose'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
     "        X_train_res = X_train\n",
     "        y_train_res = y_train\n",
     "        imbalance_report0 = 'Shape of the training set after no resampling: ', X_train_res.shape\n",
+    "        imbalance_report1 = 'Value counts of the target variable after no resampling: ', y_train_res.value_counts()\n",
     "        imbalance_var = 'none'\n",
     "        return X_train_res, y_train_res\n",
     "    \n",
   },
   {
    "cell_type": "code",
+   "execution_count": 441,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "# define a function where you can choose the model you want to use to train the data\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 442,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "evaluation_score_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1-score', 'model_variables'])\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 443,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [],
    "source": [
     "#define a function that prints the strings below\n",
     "def evaluate_models(model='random_forest'):\n",
     "     \n",
     "    all_models = ['random_forest', 'logistic_regression', 'knn', 'svm', 'naive_bayes', 'decision_tree', 'xgboost']\n",
     "    evaluation_score_append = []\n",
     "    evaluation_count_append = []\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 444,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "outputs": [
     {
      "data": {
       "application/mercury+json": {
+       "code_uid": "Text.0.40.15.8-rand27c6053f",
        "disabled": false,
        "hidden": false,
        "label": "Missing Value Threeshold",
+       "model_id": "9bf214b16a4342099c9edd6fdda6cca9",
        "rows": 1,
        "url_key": "",
        "value": "50",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9bf214b16a4342099c9edd6fdda6cca9",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/mercury+json": {
+       "code_uid": "Text.0.40.15.11-rand5d52d01b",
        "disabled": false,
        "hidden": false,
        "label": "Variance Threshold",
+       "model_id": "98b6b9bb59ec43f1bc6c824e38f4eddd",
        "rows": 1,
        "url_key": "",
        "value": "0.05",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "98b6b9bb59ec43f1bc6c824e38f4eddd",
        "version_major": 2,
        "version_minor": 0
       },
     {
      "data": {
       "application/mercury+json": {
+       "code_uid": "Text.0.40.15.14-randd7d692a8",
        "disabled": false,
        "hidden": false,
        "label": "Correlation Threshold",
+       "model_id": "b4e4bb3cc6414fcaa12c01b283081d96",
        "rows": 1,
        "url_key": "",
        "value": "0.95",
        "widget": "Text"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b4e4bb3cc6414fcaa12c01b283081d96",
        "version_major": 2,
        "version_minor": 0
       },
         4,
         5
        ],
+       "code_uid": "Select.0.40.16.18-rand6188731c",
        "disabled": false,
        "hidden": false,
        "label": "Outlier Removal Threshold",
+       "model_id": "48828625c53c4fe9ae8ad3abdab7bca6",
        "url_key": "",
        "value": 5,
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "48828625c53c4fe9ae8ad3abdab7bca6",
        "version_major": 2,
        "version_minor": 0
       },
         "minmax",
         "robust"
        ],
+       "code_uid": "Select.0.40.16.25-rand4ff0ac92",
        "disabled": false,
        "hidden": false,
        "label": "Scaling Variables",
+       "model_id": "4268185d86f34c559e1444de3c1739d9",
        "url_key": "",
        "value": "standard",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4268185d86f34c559e1444de3c1739d9",
        "version_major": 2,
        "version_minor": 0
       },
         "knn",
         "most_frequent"
        ],
+       "code_uid": "Select.0.40.16.29-rand9bb317f9",
        "disabled": false,
        "hidden": false,
        "label": "Imputation Methods",
+       "model_id": "a147c118c8f14de28b280232786f146a",
        "url_key": "",
        "value": "median",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a147c118c8f14de28b280232786f146a",
        "version_major": 2,
        "version_minor": 0
       },
         "pca",
         "boruta"
        ],
+       "code_uid": "Select.0.40.16.34-rand7cda1892",
        "disabled": false,
        "hidden": false,
        "label": "Feature Selection",
+       "model_id": "ed31020a12d842a9b6e77a88344adfd6",
        "url_key": "",
        "value": "lasso",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ed31020a12d842a9b6e77a88344adfd6",
        "version_major": 2,
        "version_minor": 0
       },
         "undersampling",
         "rose"
        ],
+       "code_uid": "Select.0.40.16.38-randc6301b14",
        "disabled": false,
        "hidden": false,
        "label": "Imbalance Treatment",
+       "model_id": "ef37d1810f974d2081c0cd9bed1d4384",
        "url_key": "",
        "value": "smote",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ef37d1810f974d2081c0cd9bed1d4384",
        "version_major": 2,
        "version_minor": 0
       },
         "decision_tree",
         "xgboost"
        ],
+       "code_uid": "Select.0.40.16.42-randce0898a7",
        "disabled": false,
        "hidden": false,
        "label": "Model Selection",
+       "model_id": "02c163a5f04e4dde8adda8eb149814d0",
        "url_key": "",
        "value": "random_forest",
        "widget": "Select"
       },
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "02c163a5f04e4dde8adda8eb149814d0",
        "version_major": 2,
        "version_minor": 0
       },
      },
      "metadata": {},
      "output_type": "display_data"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 445,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
     }
+   },
+   "outputs": [],
    "source": [
     "evaluation_score_output, evaluation_counts_output = evaluate_models(input_model)"
    ]
   {
    "attachments": {},
    "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "source": [
     "#### **Confusion Matrix**"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 446,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
    "outputs": [
     {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   Accuracy  Precision  Recall  F1-score\n",
+      "0      0.89       0.15    0.15      0.15\n"
+     ]
     },
     {
      "data": {
     "    show_normed=True\n",
     ")\n",
     "\n",
+    "print(evaluation_score_output[['Accuracy', 'Precision', 'Recall', 'F1-score']])\n",
+    "plt.show()"
    ]
   },
   {
    "attachments": {},
    "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
    "source": [
     "### **Transformations Report**"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 447,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "FEATURE REMOVAL\n",
       "('the number of columns dropped due to duplications is: ', 104)\n",
       "('the number of columns dropped due to missing values is: ', 28)\n",
       "('the number of columns dropped due to low variance is: ', 189)\n",
       "------------------------------------------\n",
       "IMBALANCE TREATMENT\n",
       "('Shape of the training set after oversampling with SMOTE: ', (2194, 14))\n",
+      "('Value counts of the target variable after oversampling with SMOTE: ', pass/fail\n",
       "0            1097\n",
       "1            1097\n",
       "dtype: int64)\n"
     }
    ],
    "source": [
     "print('FEATURE REMOVAL')\n",
     "print(feature_removal_report1)\n",
     "print(feature_removal_report2)\n",
     "print(feature_removal_report3)\n",