{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "# **Classifying products in Semiconductor Industry**" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Import the data**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "import mercury as mr\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from scipy import stats\n", "from sklearn.model_selection import train_test_split\n", "\n", "from mlxtend.plotting import plot_confusion_matrix\n", "from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score\n", "from mlxtend.plotting import plot_confusion_matrix" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [ { "data": { "application/mercury+json": { "allow_download": true, "code_uid": "App.0.40.24.1-rand53016c34", "continuous_update": false, "description": "Recumpute everything dynamically", "full_screen": true, "model_id": "mercury-app", "notify": "{}", "output": "app", "schedule": "", "show_code": false, "show_prompt": false, "show_sidebar": true, "static_notebook": false, "title": "Secom Web App Demo", "widget": "App" }, "text/html": [ "

Mercury Application

This output won't appear in the web app." ], "text/plain": [ "mercury.App" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "app = mr.App(title=\"Secom Web App Demo\", description=\"Recumpute everything dynamically\", continuous_update=False)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# Read the features data from the the url of csv into pandas dataframes and rename the columns to F1, F2, F3, etc.\n", "# Read the labels data from the url of csv into pandas dataframes and rename the columns to pass/fail and date/time\n", "\n", "#url_data = 'https://archive.ics.uci.edu/ml/machine-learning-databases/secom/secom.data'\n", "#url_labels = 'https://archive.ics.uci.edu/ml/machine-learning-databases/secom/secom_labels.data'\n", "\n", "url_data = 'secom_data.csv'\n", "url_labels = 'secom_labels.csv'\n", "\n", "features = pd.read_csv(url_data, delimiter=' ', header=None)\n", "labels = pd.read_csv(url_labels, delimiter=' ', names=['pass/fail', 'date_time'])\n", "\n", "prefix = 'F'\n", "new_column_names = [prefix + str(i) for i in range(1, len(features.columns)+1)]\n", "features.columns = new_column_names\n", "\n", "labels['pass/fail'] = labels['pass/fail'].replace({-1: 0, 1: 1})\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Split the data**" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [ { "data": { "application/mercury+json": { "code_uid": "Text.0.40.15.11-rand81961de4", "disabled": false, "hidden": false, "label": "Test Size Ratio", "model_id": "cddcc5c10139484dbc19e59ce26f012c", "rows": 1, "url_key": "", "value": "0.25", "widget": "Text" }, "application/vnd.jupyter.widget-view+json": { "model_id": "cddcc5c10139484dbc19e59ce26f012c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Text" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "code_uid": "Text.0.40.15.14-rand72283006", "disabled": false, "hidden": false, "label": "Random State Integer", "model_id": "dcac3f415e624b61aac8a3578e285bca", "rows": 1, "url_key": "", "value": "13", "widget": "Text" }, "application/vnd.jupyter.widget-view+json": { "model_id": "dcac3f415e624b61aac8a3578e285bca", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Text" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# if there is a date/time column, drop it from the features and labels dataframes, else continue\n", "\n", "if 'date_time' in labels.columns:\n", " labels = labels.drop(['date_time'], axis=1)\n", "\n", "\n", "# Split the dataset and the labels into training and testing sets\n", "# use stratify to ensure that the training and testing sets have the same percentage of pass and fail labels\n", "# use random_state to ensure that the same random split is generated each time the code is run\n", "\n", "test_size_num = mr.Text(label=\"Test Size Ratio\", value='0.25') # \n", "test_size_num = float(test_size_num.value)\n", "\n", "random_state_num = mr.Text(label=\"Random State Integer\", value='13') # \n", "random_state_num = int(random_state_num.value)\n", "\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " features, labels, test_size = test_size_num, stratify=labels, random_state=random_state_num)\n", "\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Feature Removal**" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "def columns_to_drop(df,drop_duplicates='yes', missing_values_threshold=100, variance_threshold=0, \n", " correlation_threshold=1.1):\n", " \n", " global feature_removal_report0\n", " global feature_removal_report1\n", " global feature_removal_report2\n", " global feature_removal_report3\n", " global feature_removal_report4\n", " global feature_removal_report5\n", " global feature_removal_report6\n", " \n", " \n", " feature_removal_report0 = 'Shape of the dataframe is:' , df.shape\n", "\n", " # Drop duplicated columns\n", " if drop_duplicates == 'yes':\n", " new_column_names = df.columns\n", " df = df.T.drop_duplicates().T\n", " feature_removal_report1 = 'the number of columns dropped due to duplications is: ', len(new_column_names) - len(df.columns)\n", " drop_duplicated = list(set(new_column_names) - set(df.columns))\n", "\n", " elif drop_duplicates == 'no':\n", " df = df.T.T\n", " feature_removal_report1 = 'No columns were dropped due to duplications' \n", "\n", " # Print the percentage of columns in df with missing values more than or equal to threshold\n", " feature_removal_report2 = 'the number of columns dropped due to missing values is: ', len(df.isnull().mean()[df.isnull().mean() > missing_values_threshold/100].index)\n", " \n", " # Print into a list the columns to be dropped due to missing values\n", " drop_missing = list(df.isnull().mean()[df.isnull().mean() > missing_values_threshold/100].index)\n", "\n", " # Drop columns with more than or equal to threshold missing values from df\n", " df.drop(drop_missing, axis=1, inplace=True)\n", " \n", " # Print the number of columns in df with variance less than threshold\n", " feature_removal_report3 = 'the number of columns dropped due to low variance is: ', len(df.var()[df.var() <= variance_threshold].index)\n", "\n", " # Print into a list the columns to be dropped due to low variance\n", " drop_variance = list(df.var()[df.var() <= variance_threshold].index)\n", "\n", " # Drop columns with more than or equal to threshold variance from df\n", " df.drop(drop_variance, axis=1, inplace=True)\n", "\n", " # Print the number of columns in df with more than or equal to threshold correlation\n", " \n", " # Create correlation matrix and round it to 4 decimal places\n", " corr_matrix = df.corr().abs().round(4)\n", " upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))\n", " to_drop = [column for column in upper.columns if any(upper[column] >= correlation_threshold)]\n", " feature_removal_report4 = 'the number of columns dropped due to high correlation is: ', len(to_drop)\n", "\n", " # Print into a list the columns to be dropped due to high correlation\n", " drop_correlation = [column for column in upper.columns if any(upper[column] >= correlation_threshold)]\n", "\n", " # Drop columns with more than or equal to threshold correlation from df\n", " df.drop(to_drop, axis=1, inplace=True)\n", " \n", " if drop_duplicates == 'yes':\n", " dropped = (drop_duplicated+drop_missing+drop_variance+drop_correlation)\n", "\n", " elif drop_duplicates =='no':\n", " dropped = (drop_missing+drop_variance+drop_correlation)\n", " \n", " feature_removal_report5 = 'Total number of columns to be dropped is: ', len(dropped)\n", " feature_removal_report6 = 'New shape of the dataframe is: ', df.shape\n", "\n", " global drop_duplicates_var\n", " drop_duplicates_var = drop_duplicates\n", " \n", " global missing_values_threshold_var\n", " missing_values_threshold_var = missing_values_threshold\n", "\n", " global variance_threshold_var\n", " variance_threshold_var = variance_threshold\n", "\n", " global correlation_threshold_var\n", " correlation_threshold_var = correlation_threshold\n", " \n", " return dropped" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Outlier Removal**" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "def outlier_removal(z_df, action = 'ignore', z_threshold=3):\n", " \n", " global outlier_var\n", " global outlier_removal_report0\n", " global outlier_removal_report1\n", "\n", " if action == 'ignore':\n", " outlier_removal_report0 = 'No z-score threshold was selected'\n", " outlier_var = 'none'\n", " z_df_copy = z_df.copy()\n", " outlier_removal_report1 = 'No outliers were removed from the dataset'\n", " \n", " if action == 'remove':\n", " \n", " outlier_removal_report0 = 'The z-score threshold is:', z_threshold\n", "\n", " z_df_copy = z_df.copy()\n", "\n", " z_scores = np.abs(stats.zscore(z_df_copy))\n", "\n", " # Identify the outliers in the dataset using the z-score method\n", " outliers_mask = z_scores > z_threshold\n", " z_df_copy[outliers_mask] = np.nan\n", "\n", " outliers_count = np.count_nonzero(outliers_mask)\n", " outlier_removal_report1 = 'The number of outliers removed from the dataset is:', outliers_count\n", "\n", " outlier_var = z_threshold\n", "\n", " if action == 'push':\n", "\n", " # push the outliers to the threshold value\n", " outlier_removal_report0 = 'The z-score threshold is:', z_threshold\n", "\n", " z_df_copy = z_df.copy()\n", "\n", " z_scores = np.abs(stats.zscore(z_df_copy))\n", "\n", " # Identify the outliers in the dataset using the z-score method\n", " outliers_mask = z_scores > z_threshold\n", " z_df_copy[outliers_mask] = np.sign(z_df_copy[outliers_mask]) * (3 * np.std(z_df_copy)) + np.mean(z_df_copy)\n", "\n", " outliers_count = np.count_nonzero(outliers_mask)\n", " outlier_removal_report1 = 'The number of outliers pushed to the boundaries is:', outliers_count\n", "\n", " outlier_var = str(action) + '-' + str(z_threshold) + 's'\n", "\n", " \n", " return z_df_copy" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Scaling Methods**" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# define a function to scale the dataframe using different scaling models\n", "\n", "def scale_dataframe(scale_model,df_fit, df_transform):\n", " \n", " global scale_model_var\n", " global scaling_report0\n", "\n", " if scale_model == 'robust':\n", " from sklearn.preprocessing import RobustScaler\n", " scaler = RobustScaler()\n", " scaler.fit(df_fit)\n", " df_scaled = scaler.transform(df_transform)\n", " df_scaled = pd.DataFrame(df_scaled, columns=df_transform.columns)\n", " scaling_report0 = 'The dataframe has been scaled using the robust scaling model'\n", " scale_model_var = 'robust'\n", " return df_scaled\n", " \n", " elif scale_model == 'standard':\n", " from sklearn.preprocessing import StandardScaler\n", " scaler = StandardScaler()\n", " scaler.fit(df_fit)\n", " df_scaled = scaler.transform(df_transform)\n", " df_scaled = pd.DataFrame(df_scaled, columns=df_transform.columns)\n", " scaling_report0 = 'The dataframe has been scaled using the standard scaling model'\n", " scale_model_var = 'standard'\n", " return df_scaled\n", " \n", " elif scale_model == 'normal':\n", " from sklearn.preprocessing import Normalizer\n", " scaler = Normalizer()\n", " scaler.fit(df_fit)\n", " df_scaled = scaler.transform(df_transform)\n", " df_scaled = pd.DataFrame(df_scaled, columns=df_transform.columns)\n", " scaling_report0 = 'The dataframe has been scaled using the normal scaling model'\n", " scale_model_var = 'normal'\n", " return df_scaled\n", " \n", " elif scale_model == 'minmax':\n", " from sklearn.preprocessing import MinMaxScaler\n", " scaler = MinMaxScaler()\n", " scaler.fit(df_fit)\n", " df_scaled = scaler.transform(df_transform)\n", " df_scaled = pd.DataFrame(df_scaled, columns=df_transform.columns)\n", " scaling_report0 = 'The dataframe has been scaled using the minmax scaling model'\n", " scale_model_var = 'minmax'\n", " return df_scaled\n", " \n", " elif scale_model == 'none':\n", " scaling_report0 = 'The dataframe has not been scaled'\n", " scale_model_var = 'none'\n", " return df_transform\n", " \n", " else:\n", " print('Please choose a valid scaling model: robust, standard, normal, or minmax')\n", " return None" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Missing Value Imputation**" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# define a function to impute missing values using different imputation models\n", "\n", "def impute_missing_values(imputation, df_fit, df_transform, n_neighbors=5):\n", "\n", " global imputation_var\n", " global imputation_report0\n", " global imputation_report1\n", " global imputation_report2\n", " \n", "\n", " imputation_report0 = 'Number of missing values before imputation: ', df_transform.isnull().sum().sum()\n", "\n", "\n", " if imputation == 'knn':\n", "\n", " from sklearn.impute import KNNImputer\n", " imputer = KNNImputer(n_neighbors=n_neighbors)\n", " imputer.fit(df_fit)\n", " df_imputed = imputer.transform(df_transform)\n", " df_imputed = pd.DataFrame(df_imputed, columns=df_transform.columns)\n", " imputation_report1 = 'knn imputation has been applied' \n", " imputation_report2 = 'Number of missing values after imputation: ', df_imputed.isnull().sum().sum()\n", " imputation_var = 'knn'\n", " return df_imputed\n", " \n", " elif imputation == 'mean':\n", "\n", " from sklearn.impute import SimpleImputer\n", " imputer = SimpleImputer(strategy='mean')\n", " imputer.fit(df_fit)\n", " df_imputed = imputer.transform(df_transform)\n", " df_imputed = pd.DataFrame(df_imputed, columns=df_transform.columns)\n", " imputation_report1 = 'mean imputation has been applied'\n", " imputation_report2 = 'Number of missing values after imputation: ', df_imputed.isnull().sum().sum()\n", " imputation_var = 'mean'\n", " return df_imputed\n", " \n", " elif imputation == 'median':\n", "\n", " from sklearn.impute import SimpleImputer\n", " imputer = SimpleImputer(strategy='median')\n", " imputer.fit(df_fit)\n", " df_imputed = imputer.transform(df_transform)\n", " df_imputed = pd.DataFrame(df_imputed, columns=df_transform.columns)\n", " imputation_report1 = 'median imputation has been applied'\n", " imputation_report2 = 'Number of missing values after imputation: ', df_imputed.isnull().sum().sum()\n", " imputation_var = 'median'\n", " return df_imputed\n", " \n", " elif imputation == 'most_frequent':\n", " \n", " from sklearn.impute import SimpleImputer\n", " imputer = SimpleImputer(strategy='most_frequent')\n", " imputer.fit(df_fit)\n", " df_imputed = imputer.transform(df_transform)\n", " df_imputed = pd.DataFrame(df_imputed, columns=df_transform.columns)\n", " imputation_report1 = 'most frequent imputation has been applied'\n", " imputation_report2 = 'Number of missing values after imputation: ', df_imputed.isnull().sum().sum()\n", " imputation_var = 'most_frequent'\n", " return df_imputed\n", " \n", " else:\n", " print('Please choose an imputation model from the following: knn, mean, median, most_frequent')\n", " df_imputed = df_transform.copy()\n", " return df_imputed\n" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Feature Reduction / Selection**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "def feature_selection(method, X_train, y_train):\n", "\n", " global feature_selection_var\n", " global selected_features\n", " \n", " global feature_selection_report0\n", " global feature_selection_report1\n", "\n", "\n", " # if method is boruta, run boruta feature selection and return the selected features and the training set with only the selected features\n", "\n", " if method == 'boruta':\n", " feature_selection_report0 = 'Selected method is: ', method\n", " from boruta import BorutaPy\n", " from sklearn.ensemble import RandomForestClassifier\n", " rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)\n", " boruta_selector = BorutaPy(rf,n_estimators='auto', verbose=0, random_state=42)\n", " boruta_selector.fit(X_train.values, y_train.values.ravel())\n", " selected_feature_indices = boruta_selector.support_\n", " selected_columns = X_train.columns[selected_feature_indices]\n", " X_train_filtered = X_train.iloc[:, selected_feature_indices]\n", " feature_selection_report1 = 'Shape of the training set after feature selection with Boruta: ', X_train_filtered.shape\n", " return X_train_filtered, selected_columns\n", " \n", " if method == 'none':\n", " feature_selection_report0 = 'No feature selection has been applied'\n", " X_train_filtered = X_train\n", " feature_selection_report1 = 'Shape of the training set after no feature selection: ', X_train_filtered.shape\n", " feature_selection_var = 'none'\n", " selected_features = X_train_filtered.columns\n", " return X_train_filtered, selected_features \n", " \n", " if method == 'lasso':\n", " feature_selection_report0 = 'Selected method is: ', method\n", " from sklearn.linear_model import LassoCV\n", " from sklearn.feature_selection import SelectFromModel\n", " lasso = LassoCV().fit(X_train, y_train)\n", " model = SelectFromModel(lasso, prefit=True)\n", " X_train_filtered = model.transform(X_train)\n", " selected_features = X_train.columns[model.get_support()]\n", " feature_selection_report1 = 'Shape of the training set after feature selection with LassoCV: ', X_train_filtered.shape\n", " feature_selection_var = 'lasso'\n", " return X_train_filtered, selected_features\n", " \n", " if method == 'pca':\n", " feature_selection_report0 = 'Selected method is: ', method\n", " from sklearn.decomposition import PCA\n", " pca = PCA(n_components=15)\n", " X_train_pca = pca.fit_transform(X_train)\n", " selected_features = X_train.columns[pca.explained_variance_ratio_.argsort()[::-1]][:15]\n", " feature_selection_report1 = 'Shape of the training set after feature selection with PCA: ', X_train_pca.shape\n", " feature_selection_var = 'pca'\n", " return X_train_pca, selected_features\n", " \n", " if method == 'rfe':\n", " feature_selection_report0 = 'Selected method is: ', method\n", " from sklearn.feature_selection import RFE\n", " from sklearn.ensemble import RandomForestClassifier\n", " rfe_selector = RFE(estimator=RandomForestClassifier(n_estimators=100, n_jobs=-1), n_features_to_select=15, step=10, verbose=0)\n", " rfe_selector.fit(X_train, y_train)\n", " selected_features = X_train.columns[rfe_selector.support_]\n", " X_train_filtered = X_train.iloc[:, rfe_selector.support_]\n", " feature_selection_report1 = 'Shape of the training set after feature selection with RFE: ', X_train_filtered.shape\n", " feature_selection_var = 'rfe'\n", " return X_train_filtered, selected_features\n", " " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Imbalance Treatment**" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "#define a function to oversample and understamble the imbalance in the training set\n", "\n", "def imbalance_treatment(method, X_train, y_train):\n", "\n", " global imbalance_var\n", " global imbalance_report0\n", " global imbalance_report1\n", " global imbalance_report2\n", " \n", "\n", "\n", " if method == 'smote': \n", " from imblearn.over_sampling import SMOTE\n", " sm = SMOTE(random_state=42)\n", " X_train_res, y_train_res = sm.fit_resample(X_train, y_train)\n", " imbalance_report0 = 'Shape of the training set after oversampling with SMOTE: ', X_train_res.shape\n", " imbalance_report1 = 'Value counts of the target variable after oversampling with SMOTE: '\n", " imbalance_report2 = y_train_res.value_counts()\n", " imbalance_var = 'smote'\n", " return X_train_res, y_train_res\n", " \n", " if method == 'undersampling':\n", " from imblearn.under_sampling import RandomUnderSampler\n", " rus = RandomUnderSampler(random_state=42)\n", " X_train_res, y_train_res = rus.fit_resample(X_train, y_train)\n", " imbalance_report0 = 'Shape of the training set after undersampling with RandomUnderSampler: ', X_train_res.shape\n", " imbalance_report1 = 'Value counts of the target variable after undersampling with RandomUnderSampler: '\n", " imbalance_report2 = y_train_res.value_counts()\n", " imbalance_var = 'undersampling'\n", " return X_train_res, y_train_res\n", " \n", " if method == 'rose':\n", " from imblearn.over_sampling import RandomOverSampler\n", " ros = RandomOverSampler(random_state=42)\n", " X_train_res, y_train_res = ros.fit_resample(X_train, y_train)\n", " imbalance_report0 = 'Shape of the training set after oversampling with RandomOverSampler: ', X_train_res.shape\n", " imbalance_report1 = 'Value counts of the target variable after oversampling with RandomOverSampler: '\n", " imbalance_report2 = y_train_res.value_counts()\n", " imbalance_var = 'rose'\n", " return X_train_res, y_train_res\n", " \n", " \n", " if method == 'none':\n", " X_train_res = X_train\n", " y_train_res = y_train\n", " imbalance_report0 = 'Shape of the training set after no resampling: ', X_train_res.shape\n", " imbalance_report1 = 'Value counts of the target variable after no resampling: '\n", " imbalance_report2 = y_train_res.value_counts()\n", " imbalance_var = 'none'\n", " return X_train_res, y_train_res\n", " \n", " else:\n", " print('Please choose a valid resampling method: smote, rose, undersampling or none')\n", " X_train_res = X_train\n", " y_train_res = y_train\n", " return X_train_res, y_train_res\n", " \n", " " ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Training Models**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "# define a function where you can choose the model you want to use to train the data\n", "\n", "def train_model(model, X_train, y_train, X_test, y_test):\n", "\n", " global model_var\n", "\n", " if model == 'random_forest':\n", " from sklearn.ensemble import RandomForestClassifier\n", " rfc = RandomForestClassifier(n_estimators=100, random_state=13)\n", " rfc.fit(X_train, y_train)\n", " y_pred = rfc.predict(X_test)\n", " model_var = 'random_forest'\n", " return y_pred\n", "\n", " if model == 'logistic_regression':\n", " from sklearn.linear_model import LogisticRegression\n", " lr = LogisticRegression()\n", " lr.fit(X_train, y_train)\n", " y_pred = lr.predict(X_test)\n", " model_var = 'logistic_regression'\n", " return y_pred\n", " \n", " if model == 'knn':\n", " from sklearn.neighbors import KNeighborsClassifier\n", " knn = KNeighborsClassifier(n_neighbors=5)\n", " knn.fit(X_train, y_train)\n", " y_pred = knn.predict(X_test)\n", " model_var = 'knn'\n", " return y_pred\n", " \n", " if model == 'svm':\n", " from sklearn.svm import SVC\n", " svm = SVC()\n", " svm.fit(X_train, y_train)\n", " y_pred = svm.predict(X_test)\n", " model_var = 'svm'\n", " return y_pred\n", " \n", " if model == 'naive_bayes':\n", " from sklearn.naive_bayes import GaussianNB\n", " nb = GaussianNB()\n", " nb.fit(X_train, y_train)\n", " y_pred = nb.predict(X_test)\n", " model_var = 'naive_bayes'\n", " return y_pred\n", " \n", " if model == 'decision_tree':\n", " from sklearn.tree import DecisionTreeClassifier\n", " dt = DecisionTreeClassifier()\n", " dt.fit(X_train, y_train)\n", " y_pred = dt.predict(X_test)\n", " model_var = 'decision_tree'\n", " return y_pred\n", " \n", " if model == 'xgboost':\n", " from xgboost import XGBClassifier\n", " xgb = XGBClassifier()\n", " xgb.fit(X_train, y_train)\n", " y_pred = xgb.predict(X_test)\n", " model_var = 'xgboost'\n", " return y_pred\n", " \n", " else:\n", " print('Please choose a model from the following: random_forest, logistic_regression, knn, svm, naive_bayes, decision_tree, xgboost')\n", " return None" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "evaluation_score_df = pd.DataFrame(columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1-score', 'model_variables'])\n", "evaluation_count_df = pd.DataFrame(columns=['Model', 'True Negatives', 'False Positives', 'False Negatives', 'True Positives', 'model_variables'])" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Evaluation Function**" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "#define a function that prints the strings below\n", "def evaluate_models(model='random_forest'):\n", " \n", " all_models = ['random_forest', 'logistic_regression', 'knn', 'svm', 'naive_bayes', 'decision_tree', 'xgboost']\n", " evaluation_score_append = []\n", " evaluation_count_append = []\n", " \n", " for selected_model in all_models:\n", " \n", " if model == 'all' or model == selected_model:\n", "\n", " evaluation_score = []\n", " evaluation_count = []\n", "\n", " y_pred = globals()['y_pred_' + selected_model] # Get the prediction variable dynamically\n", "\n", " def namestr(obj, namespace):\n", " return [name for name in namespace if namespace[name] is obj]\n", "\n", " model_name = namestr(y_pred, globals())[0]\n", " model_name = model_name.replace('y_pred_', '') \n", "\n", " cm = confusion_matrix(y_test, y_pred)\n", "\n", " # create a dataframe with the results for each model\n", "\n", " evaluation_score.append(model_name)\n", " evaluation_score.append(round(accuracy_score(y_test, y_pred), 2))\n", " evaluation_score.append(round(precision_score(y_test, y_pred, zero_division=0), 2))\n", " evaluation_score.append(round(recall_score(y_test, y_pred), 2))\n", " evaluation_score.append(round(f1_score(y_test, y_pred), 2))\n", " evaluation_score_append.append(evaluation_score)\n", "\n", "\n", " # create a dataframe with the true positives, true negatives, false positives and false negatives for each model\n", "\n", " evaluation_count.append(model_name)\n", " evaluation_count.append(cm[0][0])\n", " evaluation_count.append(cm[0][1])\n", " evaluation_count.append(cm[1][0])\n", " evaluation_count.append(cm[1][1])\n", " evaluation_count_append.append(evaluation_count)\n", "\n", " \n", " evaluation_score_append = pd.DataFrame(evaluation_score_append, \n", " columns=['Model', 'Accuracy', 'Precision', 'Recall', 'F1-score'])\n", " \n", " evaluation_score_append['drop duplicates'] = drop_duplicates_var\n", " evaluation_score_append['missing values th'] = missing_values_threshold_var\n", " evaluation_score_append['variance th'] = variance_threshold_var\n", " evaluation_score_append['correlation th'] = correlation_threshold_var\n", " evaluation_score_append['outlier removal th'] = outlier_var\n", " evaluation_score_append['scaling method'] = scale_model_var\n", " evaluation_score_append['imputation method'] = imputation_var\n", " evaluation_score_append['feature selection'] = feature_selection_var\n", " evaluation_score_append['imbalance treatment'] = imbalance_var\n", "\n", "\n", " evaluation_score_append['model_variables'] = drop_duplicates_var + '_' + str(missing_values_threshold_var) + '_' + str(\n", " variance_threshold_var) + '_' + str(correlation_threshold_var) + '_' + str(\n", " outlier_var) + '_' + scale_model_var + '_' + imputation_var + '_' + feature_selection_var + '_' + imbalance_var\n", " \n", "\n", " evaluation_count_append = pd.DataFrame(evaluation_count_append,\n", " columns=['Model', 'True Negatives', 'False Positives', 'False Negatives', 'True Positives'])\n", " \n", " evaluation_count_append['drop duplicates'] = drop_duplicates_var\n", " evaluation_count_append['missing values th'] = missing_values_threshold_var\n", " evaluation_count_append['variance th'] = variance_threshold_var\n", " evaluation_count_append['correlation th'] = correlation_threshold_var\n", " evaluation_count_append['outlier removal th'] = outlier_var\n", " evaluation_count_append['scaling method'] = scale_model_var\n", " evaluation_count_append['imputation method'] = imputation_var\n", " evaluation_count_append['feature selection'] = feature_selection_var\n", " evaluation_count_append['imbalance treatment'] = imbalance_var\n", " \n", " evaluation_count_append['model_variables'] = drop_duplicates_var + '_' + str(missing_values_threshold_var) + '_' + str(\n", " variance_threshold_var) + '_' + str(correlation_threshold_var) + '_' + str(\n", " outlier_var) + '_' + scale_model_var + '_' + imputation_var + '_' + feature_selection_var + '_' + imbalance_var\n", " \n", " return evaluation_score_append, evaluation_count_append" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "### **Input Variables**" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [ { "data": { "application/mercury+json": { "code_uid": "Text.0.40.15.8-randd265d777", "disabled": false, "hidden": false, "label": "Missing Value Threeshold", "model_id": "aec705cdd896483f9d28d01d5e488a64", "rows": 1, "url_key": "", "value": "50", "widget": "Text" }, "application/vnd.jupyter.widget-view+json": { "model_id": "aec705cdd896483f9d28d01d5e488a64", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Text" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "code_uid": "Text.0.40.15.11-rand7024cef0", "disabled": false, "hidden": false, "label": "Variance Threshold", "model_id": "19c44ea2727948d09fa75e29c62844d8", "rows": 1, "url_key": "", "value": "0.05", "widget": "Text" }, "application/vnd.jupyter.widget-view+json": { "model_id": "19c44ea2727948d09fa75e29c62844d8", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Text" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "code_uid": "Text.0.40.15.14-randa98919fc", "disabled": false, "hidden": false, "label": "Correlation Threshold", "model_id": "dd6d45837f4b45d3a4d58e9821188473", "rows": 1, "url_key": "", "value": "0.95", "widget": "Text" }, "application/vnd.jupyter.widget-view+json": { "model_id": "dd6d45837f4b45d3a4d58e9821188473", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Text" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "choices": [ "ignore", "remove", "push" ], "code_uid": "Select.0.40.16.19-rand82f0f224", "disabled": false, "hidden": false, "label": "Outlier Action", "model_id": "021d7973917845d4842f11c6d9f2fc67", "url_key": "", "value": "ignore", "widget": "Select" }, "application/vnd.jupyter.widget-view+json": { "model_id": "021d7973917845d4842f11c6d9f2fc67", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Select" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "choices": [ "none", 3, 4, 5 ], "code_uid": "Select.0.40.16.22-rand043a8dbf", "disabled": false, "hidden": false, "label": "Outlier Action Threshold", "model_id": "e0e42c0aef7845838da1aa1c70828482", "url_key": "", "value": "none", "widget": "Select" }, "application/vnd.jupyter.widget-view+json": { "model_id": "e0e42c0aef7845838da1aa1c70828482", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Select" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "choices": [ "none", "standard", "minmax", "robust" ], "code_uid": "Select.0.40.16.29-rand05477265", "disabled": false, "hidden": false, "label": "Scaling Variables", "model_id": "3938af66d22649bfb1cdaa77c4fb9684", "url_key": "", "value": "none", "widget": "Select" }, "application/vnd.jupyter.widget-view+json": { "model_id": "3938af66d22649bfb1cdaa77c4fb9684", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Select" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "choices": [ "mean", "median", "knn", "most_frequent" ], "code_uid": "Select.0.40.16.33-randade919ed", "disabled": false, "hidden": false, "label": "Imputation Methods", "model_id": "9367b36a66f148d2acd012c4f1c4fd71", "url_key": "", "value": "median", "widget": "Select" }, "application/vnd.jupyter.widget-view+json": { "model_id": "9367b36a66f148d2acd012c4f1c4fd71", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Select" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "choices": [ "none", "lasso", "rfe", "pca", "boruta" ], "code_uid": "Select.0.40.16.38-randff2c0505", "disabled": false, "hidden": false, "label": "Feature Selection", "model_id": "d9e985281de24f87a5e3d20d79348999", "url_key": "", "value": "none", "widget": "Select" }, "application/vnd.jupyter.widget-view+json": { "model_id": "d9e985281de24f87a5e3d20d79348999", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Select" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "choices": [ "none", "smote", "undersampling", "rose" ], "code_uid": "Select.0.40.16.42-rand81ad2a30", "disabled": false, "hidden": false, "label": "Imbalance Treatment", "model_id": "368fdc5c1dfa46029723962befc161a1", "url_key": "", "value": "none", "widget": "Select" }, "application/vnd.jupyter.widget-view+json": { "model_id": "368fdc5c1dfa46029723962befc161a1", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Select" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/mercury+json": { "choices": [ "random_forest", "logistic_regression", "knn", "svm", "naive_bayes", "decision_tree", "xgboost" ], "code_uid": "Select.0.40.16.46-rand5302564f", "disabled": false, "hidden": false, "label": "Model Selection", "model_id": "ad9082261f9e49b387ff963824152a05", "url_key": "", "value": "random_forest", "widget": "Select" }, "application/vnd.jupyter.widget-view+json": { "model_id": "ad9082261f9e49b387ff963824152a05", "version_major": 2, "version_minor": 0 }, "text/plain": [ "mercury.Select" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# input train and test sets\n", "input_train_set = X_train\n", "input_test_set = X_test\n", "\n", "# Start widget section\n", "\n", "input_drop_duplicates = 'yes'\n", "input_missing_values_threshold = mr.Text(label=\"Missing Value Threeshold\", value='50')\n", "input_missing_values_threshold = int(input_missing_values_threshold.value)\n", "\n", "input_variance_threshold = mr.Text(label=\"Variance Threshold\", value='0.05') # \n", "input_variance_threshold = float(input_variance_threshold.value)\n", "\n", "input_correlation_threshold = mr.Text(label=\"Correlation Threshold\", value='0.95') # \n", "input_correlation_threshold = float(input_correlation_threshold.value)\n", "\n", "# input outlier removal variables\n", "\n", "input_outlier_action = mr.Select(label=\"Outlier Action\", value='ignore', choices=['ignore', 'remove', 'push']) # 'ignore', 'remove', 'push'\n", "input_outlier_action = str(input_outlier_action.value)\n", "\n", "input_outlier_removal_threshold = mr.Select(label=\"Outlier Action Threshold\", value='none', choices=['none', 3, 4, 5]) # 'none' or zscore from 0 to 100\n", "if input_outlier_removal_threshold.value != 'none':\n", " input_outlier_removal_threshold = int(input_outlier_removal_threshold.value)\n", "elif input_outlier_removal_threshold.value == 'none':\n", " input_outlier_removal_threshold = str(input_outlier_removal_threshold.value)\n", "\n", "# input scaling variables\n", "input_scale_model = mr.Select(label=\"Scaling Variables\", value=\"none\", choices=['none', 'standard', 'minmax', 'robust']) # 'none', 'normal', 'standard', 'minmax', 'robust'\n", "input_scale_model = str(input_scale_model.value)\n", "\n", "# input imputation variables\n", "input_imputation_method = mr.Select(label=\"Imputation Methods\", value=\"median\", choices=['mean', 'median', 'knn', 'most_frequent']) # 'mean', 'median', 'knn', 'most_frequent'\n", "input_n_neighbors = 5 # only for knn imputation\n", "input_imputation_method = str(input_imputation_method.value)\n", "\n", "# import feature selection variables\n", "input_feature_selection = mr.Select(label=\"Feature Selection\", value=\"none\", choices=['none', 'lasso', 'rfe', 'pca', 'boruta']) # 'none', 'lasso', 'rfe', 'pca', 'boruta'\n", "input_feature_selection = str(input_feature_selection.value)\n", "\n", "# input imbalance treatment variables\n", "input_imbalance_treatment = mr.Select(label=\"Imbalance Treatment\", value=\"none\", choices=['none', 'smote', 'undersampling', 'rose']) # 'none', 'smote', 'undersampling', 'rose'\n", "input_imbalance_treatment = str(input_imbalance_treatment.value)\n", "\n", "# input model\n", "input_model = mr.Select(label=\"Model Selection\", value=\"random_forest\", choices=['random_forest', 'logistic_regression', 'knn', 'svm', 'naive_bayes','decision_tree','xgboost'])\n", "input_model = str(input_model.value)\n", "\n", "# remove features using the function list_columns_to_drop\n", "\n", "dropped = columns_to_drop(input_train_set, input_drop_duplicates, input_missing_values_threshold, input_variance_threshold, input_correlation_threshold)\n", "\n", "# drop the columns from the training and testing sets and save the new sets as new variables\n", "\n", "X_train2 = input_train_set.drop(dropped, axis=1)\n", "X_test2 = input_test_set.drop(dropped, axis=1)\n", "\n", "\n", "# remove outliers from train dataset\n", "\n", "X_train_dropped_outliers = outlier_removal(X_train2, input_outlier_action, input_outlier_removal_threshold)\n", "\n", "# scale the training and testing sets\n", "\n", "X_train_scaled = scale_dataframe(input_scale_model, X_train_dropped_outliers, X_train_dropped_outliers)\n", "X_test_scaled = scale_dataframe(input_scale_model, X_train_dropped_outliers, X_test2)\n", "\n", "# impute the missing values in the training and testing sets using the function impute_missing_values\n", "\n", "X_train_imputed = impute_missing_values(input_imputation_method,X_train_scaled, X_train_scaled, input_n_neighbors)\n", "X_test_imputed = impute_missing_values(input_imputation_method,X_train_scaled, X_test_scaled, input_n_neighbors)\n", "\n", "# select features\n", "\n", "X_train_selected, selected_features = feature_selection(input_feature_selection, X_train_imputed, y_train)\n", "\n", "X_train_selected = pd.DataFrame(X_train_selected, columns=selected_features)\n", "X_test_selected = X_test_imputed[selected_features]\n", "\n", "# treat imbalance in the training set using the function oversample\n", "\n", "X_train_res, y_train_res = imbalance_treatment(input_imbalance_treatment, X_train_selected, y_train)\n", "\n", "# train the model using the function train_model and save the predictions as new variables\n", "\n", "y_pred_random_forest = train_model('random_forest', X_train_res, y_train_res, X_test_selected, y_test)\n", "y_pred_logistic_regression = train_model('logistic_regression', X_train_res, y_train_res, X_test_selected, y_test)\n", "y_pred_knn = train_model('knn', X_train_res, y_train_res, X_test_selected, y_test)\n", "y_pred_svm = train_model('svm', X_train_res, y_train_res, X_test_selected, y_test)\n", "y_pred_naive_bayes = train_model('naive_bayes', X_train_res, y_train_res, X_test_selected, y_test)\n", "y_pred_decision_tree = train_model('decision_tree', X_train_res, y_train_res, X_test_selected, y_test)\n", "y_pred_xgboost = train_model('xgboost', X_train_res, y_train_res, X_test_selected, y_test)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "evaluation_score_output, evaluation_counts_output = evaluate_models(input_model)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "#### **Confusion Matrix**" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Accuracy Precision Recall F1-score\n", "0 0.93 0.0 0.0 0.0\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAasAAAGwCAYAAAAXAEo1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8o6BhiAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAu+klEQVR4nO3deVxV1f7/8fcRBHHgOBAoiko5hIqK+BWxNM0EtTS/zd1yTuumpjnmr7R7G66mmTbprZy7fsu6TWbeyhxyhBLFEVGvA4ogCghiCAjn94d58ogkyMGzlNfz8eBRZ529F599HsKbtffae1lsNptNAAAYrIKrCwAA4GoIKwCA8QgrAIDxCCsAgPEIKwCA8QgrAIDxCCsAgPHcXV1AaRQUFOj48eOqVq2aLBaLq8sBAJSQzWbTmTNn5O/vrwoVih4/3dBhdfz4cQUEBLi6DABAKR09elT16tUr8v0bOqyqVasmSfJo1l8WNw8XVwOUjYS1b7q6BKDMnMnMVKPAAPvv86Lc0GF18dSfxc2DsMJNy9vb29UlAGXuapdymGABADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYVXODXn4Tv2ydKJOrJ+uE+una+2iMYq4o5nDNk0D/fT5rKeVvG66Uja8qZ8XjVFA7RoO24S1DNR/PhihU5tmKGndNP3w0UhV8qx4PQ8FKLUP5szW7Y0DVb1qJXVoF6oNG9a7uiT8zt3VBcC1Ek+c1qR3v9F/E05Jkp7sFabPZw5V+8emKu5gsgLr+WjV/NFa9PUmvTbnO2VkZev2wNo6l5Nn7yOsZaC+ee9ZvbngR41+43Plns9XyyZ1VVBgc9VhASX2+WdLNW7MKL397myFd7hDcz/6QH3u66GtO/aofv36ri6v3LPYbLYb9jdKZmamrFarPIOHyOLm4epybhqJa9/Q/5v1tRZ9vVmLpw5UXl6+Bk9aXOT2Py8ao1XRe/XK7O+uY5XlR/qv77m6hHKhY4cwhYS00Tvvz7G3tQ4OUq/effTq61NcWNnNLTMzU361rMrIyJC3t3eR23EaEHYVKlj0cGSoqnh5KHrHIVksFnW/s7n2J6Ro2fvDdGTVFK1bPFa9Ore073NLjapq1zJQJ9OytGbhaB3+6R/6ce5IdWh9qwuPBCiZ3Nxcbdsao67dIhzau94ToajNm1xUFS5FWEHNG/nr5MYZyoiepXdefFSPjvlIew8my7dmVVWrUkljB3bTyk171Ouv72nZmu36dMZTujO0kSQpsJ6PJOnFp3tq/pebdP+w2YqNO6oVH4zQbfVvceVhAcV26tQp5efny9fXz6Hdz89PJ04ku6gqXIprVtC+wycU9tgUVa9WWX26ttZHr/RVxFNvK+NMtiRp+dqdenfJGknSjn2JCmt1q4Y8dKc2xBxQhQoWSdK8Lzbo42VRkqTt8cfUuV1T9b8/XJPfXeaagwKugcVicXhts9kKtcE1GFlBeefzdfDoKW3dk6DJ7y7Tzn2JGvZ4Z51Kz1JeXr7iDiY5bB9/MNk+GzDpZKYkKe6g41+f8YeSC80YBEzl4+MjNze3QqOolJSUQqMtuAZhhUIsssjTw1155/MVs+eImjRw/GFt3MBXCUnpkqQjx1N1POW0mjT0ddimUQNfJSSlXbeagdLw8PBQSJtQrf5ppUP76lUr1T68g4uqwqU4DVjO/X14L/24cY+OJqerWpVKejgyVJ3aNlbvYbMlSTMX/aSP3xikDVsP6Oct+xTRoZl6dmqhyCFv2/uYuegnvfTMvdq5L1Hb44/pyV5hatrQT38ZN89VhwWU2HOjRmvwgL5qE9pWYe3DNW/uhzqakKCnhj7j6tIgwqrc861VTfNe66faPt7KyDqnXfsT1XvYbK2O3itJWrZmh0a8/qnGDYrQjPEPad+RFD0+bq42xR609/He/61VJc+KmjbmQdWwVtbOfYm676/v6dCxU646LKDEHn7kUaWlpuofr7+i5KQkNW/eQl9/u0INGjRwdWmQAfdZzZ49W9OnT1dSUpKaN2+uWbNmqWPHjsXal/usUB5wnxVuZjfEfVZLly7VqFGj9OKLL2rbtm3q2LGjevTooYSEBFeWBQAwjEvD6q233tLgwYP11FNPKSgoSLNmzVJAQIDmzJlz9Z0BAOWGy8IqNzdXMTExiohwvGM8IiJCmzZd+Y7xnJwcZWZmOnwBAG5+Lguri3eM+/kVvmM8OfnKd4xPmTJFVqvV/hUQEHA9SgUAuJjL77MqyR3jEydOVEZGhv3r6NGj16NEAICLuSysLt4xfvkoKiUlpdBo6yJPT095e3s7fKF4alqr6MiqKapfp6ZL62jeyF8Hvn9VlSsxexPOlZqaqvr+vjpy+LBL69i1c6dua1hPZ8+edWkdNxuXhZWHh4dCQ0O1cqXjHeMrV65Uhw7cMe5s4wZFaMW6nfanSrw57kFtXDJep6NnKurTF4rVh0dFd7014WEdXT1VpzbN0OeznlZd3+oO21Sv5qV5r/ZT8rrpSl43XfNe7SdrVS/7+7sPHNeWXUc04skuTjs2QJKmvzFFPe/tpQYNG0qSEhIS9GCfXqplraJ6tX00etRzys3N/dM+cnJy9PzIEapX20e1rFX00P/21rFjxxy2SU9P16D+feVXyyq/WlYN6t9Xp0+ftr/fIjhYbf+nnd59e6azD7Fcc+lpwNGjR2vu3LmaP3++4uLi9PzzzyshIUHPPMMd485UybOi+vcJ18KvNtvbLBaLFn8TpX//uLXY/Uwf96B6d2mpfhMXqOvAmarq5aEv3nnG/jBbSVo4ZYBaNq2n+4fP1v3DZ6tl03qa91o/h34WL4vS0Ic7OuwHlEZ2drYWLZinAYOekiTl5+frgd736uzZs1q1doMWL/lUX3/1hSaMG/On/YwbPUrLvvlKi5d8qlVrNygrK0sP3n+f8vPz7dsM6PsX7dgeq2+Wf69vln+vHdtjNXhAX4d++vUfqA8/mOOwH0rHpU+wePTRR5WamqpXXnlFSUlJatGihVas4I5xZ4u8o5nO5+cresche9uYaf+WJPnU6KkWjetetQ/vqpU0oE+4Br+0WGui4yVJg15arP3/eVV3h92unzbHqWmgnyLvaK5Ofafr111HJEnDXv0//bx4rBo38NX+IymSpJWb4lTTWkUdQxvr51/3OftwUQ798P1/5O7urvbh4ZKkn1b+qLi4Pdq/4qj8/f0lSVOnzdDQwQP091dfv+IlhIyMDC1cME/zFn6su7veI0mav+hfahwYoNWrflK3iEjtjYvTjz98r583RKldWJgk6f1/fqTOHcO1Lz5eTZo2lSR1i4hUWmqq1q/7WZ273H09PoKbnssnWDz77LM6fPiwcnJyFBMTo06dOrm6pJvOnW0aaeue0t1oHRJUXx4V3fXT5jh7W9LJDO3+73G1bxUo6cLy9qfP/GYPKkn6ZedhnT7zm9q3+mMxxrzz+dq5L1F3hNxWqpqAizasX6c2oW3tr6OjNqt58xb2oJIuBEhOTo62bY25Yh/btsYoLy9P91yyAKO/v7+aN29hX4AxOmqzrFarPagkKax9e1mtVodFGj08PBTcspU2bljvtGMs71weVih7DfxrKulkRqn6qF3LWzm5eTr9+xpXF6WknpFfrQt/pfrV8tbJtKxC+55My5Kfj+NfssdTTquBf61S1QRcdOTIYdWp80cwnUhOlu9lE7Vq1KghDw+PIm+NSU5OloeHh2rUcFzaxtfPTyd+3+fEiWTd4utbaN9bfH0LLS/iX7euyyd73EwIq3KgkqeHzuWcL5O+LRaLLn245JUeNWmxSLqsPTsnT5UrVSyTmlD+nMvOVqVKlRzarnQLzLUspnj5PkX1q8vavSp56bfs30r0vVA0wqocSD2dpRrelUvVR3Jqpjw9Kqp6NS+H9ltqVlVK6oUniZxIzZRvrWqF9vWpUVUnUs84tNWwVtap9MKjMOBa1Krlo/TT6fbXfrVr20dDF6WnpysvL6/IW2Nq166t3NxcpaenO7SfTEmxj9L8/Gor5cSJQvueOnlSfpct0pieniYfn1uu6XhQGGFVDmzfe0y331q7VH1si0tQbt55dW1/u72tto+3mt/mr6jtFyZuRO84pOrVKqtt8z8myPxPiwaqXq2yorYfdOiv+W3+io13nBIMXKtWISHau2eP/XVY+3Dt3r1LSUl/rHL908of5enpqZA2oVfsI6RNqCpWrKhVlyzAmJSUpN27d9kXYAxrH66MjAz9+ssv9m1+iY5WRkZGoUUad+/epdatQ5xyfCCsyoWVm+PU7NY6DqOiWwN81LJJXfn5eMvLs6JaNqmrlk3qqqK7myTJ/xarYr98yR48mVnntPDrzZo6+gF1btdErZrW0/zX+mvXgeP2ta/iD53QDxt36/3Jj6tdcEO1C26o9yf9Rd/9vNM+E1CS6tepKX9fq9b8vh9QWt26RWrPnt32UdE93SIUFNRMgwf0Vey2bVqzepUmThirgYOH2GcCJiYmqlWL2+3BY7VaNWDgYL0wfozWrF6l2G3bNKj/k2rRItg+O/D2oCBFRHbXsGeGKDoqStFRURr2zBD1vPc++0xASTpy+LCOJyaqy+/7ofRYfLEc2H3guLbGJejBiDaa98VGSdKcyU+oU9vG9m2il06UJDXtOVkJSWlyd3dT08Da8rrkSRPj3/xC+fkF+tcbg+XlWVFrfonX0JEfq6Dgj+tRA//fIs0Y/5C+nT1MkvTdzzv1/NTPHep5pEdb/bR5rxKSHE+3ANeqRXCw2oS21Reff6anhj4tNzc3fbnsO40a8azuvusOeXl56ZHH/qKp096073M+L0/74uOVfcl1pWkzZsrN3V1PPv6IsrOz1eXurvpw3kK5ubnZt1mweInGjHpOvXpemDV47329NfMdxzXHPlv6ie7pFsFtOE7k8sUXS4PFF4sv8s5mmvL8/yr0oX9ccRLE9eJR0V27vpms/hMXavNlpwZxZSy+WDzf/2eFJk4Yq5jYXapQwXUnjXJyctQiqLEWffyJOtxxh8vquFEUd/FFRlblxA8b9qhRgK/q+lp17MRpl9VRv05NvTHvB4IKTte9R08d2L9fiYmJLl2RIeHIEU144UWCyskYWQGGY2SFm9kNsaw9AADFQVgBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjEdYAQCMR1gBAIxHWAEAjOdenI2WLVtW7A579+59zcUAAHAlxQqrPn36FKszi8Wi/Pz80tQDAEAhxQqrgoKCsq4DAIAileqa1blz55xVBwAARSpxWOXn5+vVV19V3bp1VbVqVR08eFCSNGnSJM2bN8/pBQIAUOKwev3117Vw4UJNmzZNHh4e9vbg4GDNnTvXqcUBACBdQ1gtXrxYH374oZ544gm5ubnZ21u2bKm9e/c6tTgAAKRrCKvExEQ1atSoUHtBQYHy8vKcUhQAAJcqcVg1b95c69evL9T++eefKyQkxClFAQBwqWJNXb/Uyy+/rL59+yoxMVEFBQX68ssvFR8fr8WLF2v58uVlUSMAoJwr8ciqV69eWrp0qVasWCGLxaLJkycrLi5O3377rbp161YWNQIAyrkSj6wkKTIyUpGRkc6uBQCAK7qmsJKkLVu2KC4uThaLRUFBQQoNDXVmXQAA2JU4rI4dO6bHH39cGzduVPXq1SVJp0+fVocOHfTJJ58oICDA2TUCAMq5El+zGjRokPLy8hQXF6e0tDSlpaUpLi5ONptNgwcPLosaAQDlXIlHVuvXr9emTZvUtGlTe1vTpk317rvv6o477nBqcQAASNcwsqpfv/4Vb/49f/686tat65SiAAC4VInDatq0aRoxYoS2bNkim80m6cJki5EjR+rNN990eoEAABTrNGCNGjVksVjsr8+ePauwsDC5u1/Y/fz583J3d9egQYOKvVAjAADFVaywmjVrVhmXAQBA0YoVVv379y/rOgAAKNI13xQsSdnZ2YUmW3h7e5eqIAAALlfiCRZnz57V8OHD5evrq6pVq6pGjRoOXwAAOFuJw2r8+PFavXq1Zs+eLU9PT82dO1d///vf5e/vr8WLF5dFjQCAcq7EpwG//fZbLV68WJ07d9agQYPUsWNHNWrUSA0aNNCSJUv0xBNPlEWdAIByrMQjq7S0NAUGBkq6cH0qLS1NknTnnXdq3bp1zq0OAABdQ1jdeuutOnz4sCSpWbNm+uyzzyRdGHFdfLAtAADOVOKwGjhwoLZv3y5Jmjhxov3a1fPPP69x48Y5vUAAAEp8zer555+3/3+XLl20d+9ebdmyRbfddptatWrl1OIAAJBKeZ+VdOHBtvXr13dGLQAAXFGxwuqdd94pdofPPffcNRcDAMCVFCusZs6cWazOLBYLYQUAcLpihdWhQ4fKug4AAIpU4tmAAABcb4QVAMB4hBUAwHiEFQDAeIQVAMB41xRW69ev15NPPqnw8HAlJiZKkj7++GNt2LDBqcUBACBdQ1h98cUXioyMlJeXl7Zt26acnBxJ0pkzZ/SPf/zD6QUCAFDisHrttdf0z3/+Ux999JEqVqxob+/QoYO2bt3q1OIAAJCuIazi4+PVqVOnQu3e3t46ffq0M2oCAMBBicOqTp06OnDgQKH2DRs26NZbb3VKUQAAXKrEYfX0009r5MiRio6OlsVi0fHjx7VkyRKNHTtWzz77bFnUCAAo50q8RMj48eOVkZGhLl266Ny5c+rUqZM8PT01duxYDR8+vCxqBACUcxabzWa7lh1/++037dmzRwUFBWrWrJmqVq3q7NquKjMzU1arVZ7BQ2Rx87ju3x+4HtJ/fc/VJQBlJjMzU361rMrIyJC3t3eR213z4ouVK1dW27Ztr3V3AACKrcRh1aVLF1ksliLfX716dakKAgDgciUOq9atWzu8zsvLU2xsrHbt2qX+/fs7qy4AAOxKHFZFrRr8t7/9TVlZWaUuCACAyzntQbZPPvmk5s+f76zuAACwc1pYbd68WZUqVXJWdwAA2JX4NOADDzzg8NpmsykpKUlbtmzRpEmTnFYYAAAXlTisrFarw+sKFSqoadOmeuWVVxQREeG0wgAAuKhEYZWfn68BAwYoODhYNWvWLKuaAABwUKJrVm5uboqMjFRGRkZZ1QMAQCElnmARHBysgwcPlkUtAABcUYnD6vXXX9fYsWO1fPlyJSUlKTMz0+ELAABnK/EEi+7du0uSevfu7fDYJZvNJovFovz8fOdVBwCAriGs1qxZUxZ1AABQpBKHVWBgoAICAgo9zNZms+no0aNOKwwAgItKfM0qMDBQJ0+eLNSelpamwMBApxQFAMClShxWF69NXS4rK4vHLQEAykSxTwOOHj1akmSxWDRp0iRVrlzZ/l5+fr6io6MLLR8CAIAzFDustm3bJunCyGrnzp3y8PhjGXkPDw+1atVKY8eOdX6FAIByr9hhdXEW4MCBA/X222/L29u7zIoCAOBSJZ4NuGDBgrKoAwCAIjltPSsAAMoKYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYQUAMB5hBQAwHmEFADAeYYVCxg6K0IZ/jVPKhjd1ZNUUffbWEDVu4Ftou6aBfvp81tNKXjddKRve1M+Lxiigdg0XVAw4xwdzZuv2xoGqXrWSOrQL1YYN611dEn5HWKGQjm0a6Z9L1+mufm/qvr++Jzc3Ny2fM1yVK3nYtwms56NV80dr36FkRQ55W+0enaIpH32vczl5LqwcuHaff7ZU48aM0oQXXlTUr9vU4c6O6nNfDyUkJLi6NEiy2Gw2m6uLuFaZmZmyWq3yDB4ii5vH1XfANfGpUVVHV0/VPYNnauPW/0qSFk8dqLy8fA2etNjF1d380n99z9UllAsdO4QpJKSN3nl/jr2tdXCQevXuo1dfn+LCym5umZmZ8qtlVUZGhry9vYvcjpEVrsq7aiVJUnrGb5Iki8Wi7nc21/6EFC17f5iOrJqidYvHqlfnlq4sE7hmubm52rY1Rl27RTi0d70nQlGbN7moKlyKsMJVvTHmQW3cekB7/pskSfKtWVXVqlTS2IHdtHLTHvX663tatma7Pp3xlO4MbeTiaoGSO3XqlPLz8+Xr6+fQ7ufnpxMnkl1UFS7l7uoCYLaZLzyi4Mb+6jpwpr2tQoULf+MsX7tT7y5ZI0nasS9RYa1u1ZCH7tSGmAMuqRUoLYvF4vDaZrMVaoNrMLJCkd6a8LDuuytYkUPeUWLKaXv7qfQs5eXlK+5gksP28QeTmQ2IG5KPj4/c3NwKjaJSUlIKjbbgGoQVrmjmhId1/92t1P3pd3TkeKrDe3nn8xWz54iaNHD8IW7cwFcJSenXs0zAKTw8PBTSJlSrf1rp0L561Uq1D+/goqpwKU4DopBZEx/Roz3a6uHnP1TW2XPyq1VNkpSRdc4+NX3mop/08RuDtGHrAf28ZZ8iOjRTz04tFDnkbVeWDlyz50aN1uABfdUmtK3C2odr3twPdTQhQU8NfcbVpUFMXccVZG+78lTpIZM/1r++jba/7nd/e40bFKG6vtW170iKXvvnd1q+duf1KrPcYOr69fPBnNl6a8Y0JSclqXnzFpo2Y6bu7NjJ1WXd1Io7dd2lYbVu3TpNnz5dMTExSkpK0ldffaU+ffoUe3/CCuUBYYWb2Q1xn9XZs2fVqlUrvfceP4wAgKK59JpVjx491KNHj2Jvn5OTo5ycHPvrzMzMsigLAGCYG2o24JQpU2S1Wu1fAQEBri4JAHAd3FBhNXHiRGVkZNi/jh496uqSAADXwQ01dd3T01Oenp6uLgMAcJ3dUCMrXLua1io6smqK6tep6dI6mjfy14HvX3VYbgRwhtTUVNX399WRw4ddWseunTt1W8N6Onv2rEvruNkQVuXEuEERWrFupxKS0iRJb457UBuXjNfp6JmK+vSFYvXhUdFdb014WEdXT9WpTTP0+aynVde3usM21at5ad6r/ZS8brqS103XvFf7yVrVy/7+7gPHtWXXEY14sovTjg2QpOlvTFHPe3upQcOGkqSEhAQ92KeXalmrqF5tH40e9Zxyc3P/tI+cnBw9P3KE6tX2US1rFT30v7117Ngxh23S09M1qH9f+dWyyq+WVYP699Xp06ft77cIDlbb/2mnd9+eKTiPS8MqKytLsbGxio2NlSQdOnRIsbGxLHbmZJU8K6p/n3At/Gqzvc1isWjxN1H6949bi93P9HEPqneXluo3cYG6Dpypql4e+uKdZ1Shwh8P+lw4ZYBaNq2n+4fP1v3DZ6tl03qa91o/h34WL4vS0Ic7OuwHlEZ2drYWLZinAYOekiTl5+frgd736uzZs1q1doMWL/lUX3/1hSaMG/On/YwbPUrLvvlKi5d8qlVrNygrK0sP3n+f8vPz7dsM6PsX7dgeq2+Wf69vln+vHdtjNXhAX4d++vUfqA8/mOOwH0rHpWG1ZcsWhYSEKCQkRJI0evRohYSEaPLkya4s66YTeUcznc/PV/SOQ/a2MdP+rQ8+W6dDx1L/ZM8/eFetpAF9wvXCW19pTXS8tscf06CXFqtFI3/dHXa7pAvL3Efe0VzPvrJE0TsOKXrHIQ179f90713BatzA197Xyk1xqmmtoo6hjZ17oCi3fvj+P3J3d1f78HBJ0k8rf1Rc3B7NX/QvtQ4J0d1d79HUaTO0YN5HRd7ykpGRoYUL5mnqtBm6u+s9ah0SovmL/qVdu3Zq9aqfJEl74+L04w/fa/YHc9U+PFztw8P1/j8/0orvlmtffLy9r24RkUpLTdX6dT+X/cGXEy4Nq86dO8tmsxX6WrhwoSvLuunc2aaRtu4p3Wg1JKi+PCq666fNcfa2pJMZ2v3f42rfKlCSFNYyUKfP/KZfdx2xb/PLzsM6feY3tW91q70t73y+du5L1B0ht5WqJuCiDevXqU1oW/vr6KjNat68hfz9/e1t3SIilZOTo21bY67Yx7atMcrLy9M9lyzA6O/vr+bNW9gXYIyO2iyr1ap2YWH2bcLat5fVanVYpNHDw0PBLVtp44b1TjvG8o5rVuVAA/+aSjqZUao+atfyVk5unk6fyXZoT0k9I79aFx6R4lfLWyfTsgrtezItS34+jo9ROZ5yWg38a5WqJuCiI0cOq06dP4LpRHKyfP0cVwWoUaOGPDw8lJx85cUUk5OT5eHhoRo1HJe58fXz04nf9zlxIlm3+PoW2vcWX99Cy4v4163r8skeNxPCqhyo5Omhcznny6Rvi8WiSx8ueaVHTVoski5rz87JU+VKFcukJpQ/57KzValSJYe2Ky2aeC2LKV6+T1H96rJ2r0pe+i37txJ9LxSNsCoHUk9nqYZ35VL1kZyaKU+Piqpezcuh/ZaaVZWSeuEawInUTPn+vpzIpXxqVNWJ1DMObTWslXUqvfAoDLgWtWr5KP30H2up+dWubR8NXZSenq68vDz5+V15McXatWsrNzdX6emOa7KdTEmxj9L8/Gor5cSJQvueOnlSfpct0pieniYfn1uu6XhQGGFVDmzfe0y331q7VH1si0tQbt55dW1/u72tto+3mt/mr6jtFyZuRO84pOrVKqtt8wb2bf6nRQNVr1ZZUdsPOvTX/DZ/xcY7TgkGrlWrkBDt3bPH/jqsfbh2796lpKQ/VrP+aeWP8vT0VEib0Cv2EdImVBUrVtSqSxZgTEpK0u7du+wLMIa1D1dGRoZ+/eUX+za/REcrIyOj0CKNu3fvUuvWIU45PhBW5cLKzXFqdmsdh1HRrQE+atmkrvx8vOXlWVEtm9RVyyZ1VdHdTZLkf4tVsV++ZA+ezKxzWvj1Zk0d/YA6t2uiVk3raf5r/bXrwHGtjt4rSYo/dEI/bNyt9yc/rnbBDdUuuKHen/QXfffzTu0/kmL/3vXr1JS/r1Vrft8PKK1u3SK1Z89u+6jonm4RCgpqpsED+ip22zatWb1KEyeM1cDBQ+zLUCQmJqpVi9vtwWO1WjVg4GC9MH6M1qxepdht2zSo/5Nq0SJYd3e9R5J0e1CQIiK7a9gzQxQdFaXoqCgNe2aIet57n5o0bWqv58jhwzqemKguv++H0ruhHreEa7P7wHFtjUvQgxFtNO+LjZKkOZOfUKe2f0wdj146UZLUtOdkJSSlyd3dTU0Da8vrkidNjH/zC+XnF+hfbwyWl2dFrfklXkNHfqyCgj+uRw38f4s0Y/xD+nb2MEnSdz/v1PNTP3eo55EebfXT5r1KSHI83QJcqxbBwWoT2lZffP6Znhr6tNzc3PTlsu80asSzuvuuO+Tl5aVHHvuLpk57077P+bw87YuPV/Yl15WmzZgpN3d3Pfn4I8rOzlaXu7vqw3kL5ebmZt9mweIlGjPqOfXqeWHW4L339dbMdxyXOfps6Se6p1uEGjRoIDgHKwWXE5F3NtOU5/9XoQ/944qTIK4Xj4ru2vXNZPWfuFCbLzs1iCtj8cXi+f4/KzRxwljFxO5ShQquO2mUk5OjFkGNtejjT9ThjjtcVseNoriLLzKyKid+2LBHjQJ8VdfXqmMnTrusjvp1auqNeT8QVHC67j166sD+/UpMTHTp8kEJR45owgsvElROxsgKMBwjK9zMbohl7QEAKA7CCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8d1cXUBo2m+3Cf/NzXVwJUHYyMzNdXQJQZs78/u/74u/zolhsV9vCYMeOHVNAQICrywAAlNLRo0dVr169It+/ocOqoKBAx48fV7Vq1WSxWFxdTrmQmZmpgIAAHT16VN7e3q4uB3Aq/n1ffzabTWfOnJG/v78qVCj6ytQNfRqwQoUKf5rEKDve3t78MOOmxb/v68tqtV51GyZYAACMR1gBAIxHWKFEPD099fLLL8vT09PVpQBOx79vc93QEywAAOUDIysAgPEIKwCA8QgrAIDxCCsAgPEIKxTb7NmzFRgYqEqVKik0NFTr1693dUmAU6xbt069evWSv7+/LBaLvv76a1eXhMsQViiWpUuXatSoUXrxxRe1bds2dezYUT169FBCQoKrSwNK7ezZs2rVqpXee+89V5eCIjB1HcUSFhamNm3aaM6cOfa2oKAg9enTR1OmTHFhZYBzWSwWffXVV+rTp4+rS8ElGFnhqnJzcxUTE6OIiAiH9oiICG3atMlFVQEoTwgrXNWpU6eUn58vPz8/h3Y/Pz8lJye7qCoA5QlhhWK7fBkWm83G0iwArgvCClfl4+MjNze3QqOolJSUQqMtACgLhBWuysPDQ6GhoVq5cqVD+8qVK9WhQwcXVQWgPLmhF1/E9TN69Gj17dtXbdu2VXh4uD788EMlJCTomWeecXVpQKllZWXpwIED9teHDh1SbGysatasqfr167uwMlzE1HUU2+zZszVt2jQlJSWpRYsWmjlzpjp16uTqsoBSW7t2rbp06VKovX///lq4cOH1LwiFEFYAAONxzQoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKAGA8wgoAYDzCCgBgPMIKcIKGDRtq1qxZ9teuWhr9b3/7m1q3bl3k+2vXrpXFYtHp06eL3Wfnzp01atSoUtW1cOFCVa9evVR9oHwjrIAykJSUpB49ehRr26sFDAAeZAvY5ebmysPDwyl91a5d2yn9ALiAkRVuSp07d9bw4cM1fPhwVa9eXbVq1dJLL72kSx+F2bBhQ7322msaMGCArFarhgwZIknatGmTOnXqJC8vLwUEBOi5557T2bNn7fulpKSoV69e8vLyUmBgoJYsWVLo+19+GvDYsWN67LHHVLNmTVWpUkVt27ZVdHS0Fi5cqL///e/avn27LBaLLBaL/cGpGRkZGjp0qHx9feXt7a27775b27dvd/g+U6dOlZ+fn6pVq6bBgwfr3LlzJfqcUlNT9fjjj6tevXqqXLmygoOD9cknnxTa7vz583/6Webm5mr8+PGqW7euqlSporCwMK1du7ZEtQB/hrDCTWvRokVyd3dXdHS03nnnHc2cOVNz58512Gb69Olq0aKFYmJiNGnSJO3cuVORkZF64IEHtGPHDi1dulQbNmzQ8OHD7fsMGDBAhw8f1urVq/Xvf/9bs2fPVkpKSpF1ZGVl6a677tLx48e1bNkybd++XePHj1dBQYEeffRRjRkzRs2bN1dSUpKSkpL06KOPymaz6d5771VycrJWrFihmJgYtWnTRl27dlVaWpok6bPPPtPLL7+s119/XVu2bFGdOnU0e/bsEn1G586dU2hoqJYvX65du3Zp6NCh6tu3r6Kjo0v0WQ4cOFAbN27Up59+qh07dujhhx9W9+7dtX///hLVAxTJBtyE7rrrLltQUJCtoKDA3jZhwgRbUFCQ/XWDBg1sffr0cdivb9++tqFDhzq0rV+/3lahQgVbdna2LT4+3ibJFhUVZX8/Li7OJsk2c+ZMe5sk21dffWWz2Wy2Dz74wFatWjVbamrqFWt9+eWXba1atXJoW7Vqlc3b29t27tw5h/bbbrvN9sEHH9hsNpstPDzc9swzzzi8HxYWVqivS61Zs8YmyZaenl7kNj179rSNGTPG/vpqn+WBAwdsFovFlpiY6NBP165dbRMnTrTZbDbbggULbFartcjvCVwN16xw02rfvr0sFov9dXh4uGbMmKH8/Hy5ublJktq2beuwT0xMjA4cOOBwas9ms6mgoECHDh3Svn375O7u7rDf7bff/qcz3WJjYxUSEqKaNWsWu/aYmBhlZWWpVq1aDu3Z2dn673//K0mKi4srtPhleHi41qxZU+zvk5+fr6lTp2rp0qVKTExUTk6OcnJyVKVKFYft/uyz3Lp1q2w2m5o0aeKwT05OTqH6gWtFWKFcu/yXckFBgZ5++mk999xzhbatX7++4uPjJcnhF/fVeHl5lbiugoIC1alT54rXfZw5BXzGjBmaOXOmZs2apeDgYFWpUkWjRo1Sbm5uiWp1c3NTTEyM/Y+Ai6pWreq0WlG+EVa4aUVFRRV63bhx40K/UC/Vpk0b7d69W40aNbri+0FBQTp//ry2bNmidu3aSZLi4+P/9L6lli1bau7cuUpLS7vi6MrDw0P5+fmF6khOTpa7u7saNmxYZC1RUVHq16+fwzGWxPr163X//ffrySeflHQhePbv36+goCCH7f7sswwJCVF+fr5SUlLUsWPHEn1/oLiYYIGb1tGjRzV69GjFx8frk08+0bvvvquRI0f+6T4TJkzQ5s2bNWzYMMXGxmr//v1atmyZRowYIUlq2rSpunfvriFDhig6OloxMTF66qmn/nT09Pjjj6t27drq06ePNm7cqIMHD+qLL77Q5s2bJV2YlXjo0CHFxsbq1KlTysnJ0T333KPw8HD16dNHP/zwgw4fPqxNmzbppZde0pYtWyRJI0eO1Pz58zV//nzt27dPL7/8snbv3l2iz6hRo0ZauXKlNm3apLi4OD399NNKTk4u0WfZpEkTPfHEE+rXr5++/PJLHTp0SL/++qveeOMNrVixokT1AEUhrHDT6tevn7Kzs9WuXTsNGzZMI0aM0NChQ/90n5YtW+rnn3/W/v371bFjR4WEhGjSpEmqU6eOfZsFCxYoICBAd911lx544AH79PKieHh46Mcff5Svr6969uyp4OBgTZ061T7Ce/DBB9W9e3d16dJFt9xyiz755BNZLBatWLFCnTp10qBBg9SkSRM99thjOnz4sPz8/CRJjz76qCZPnqwJEyYoNDRUR44c0V//+tcSfUaTJk1SmzZtFBkZqc6dO9tDtaSf5YIFC9SvXz+NGTNGTZs2Ve/evRUdHa2AgIAS1QMUxWKzXXKzBHCT6Ny5s1q3bu3wCCQANy5GVgAA4xFWAADjcRoQAGA8RlYAAOMRVgAA4xFWAADjEVYAAOMRVgAA4xFWAADjEVYAAOMRVgAA4/1/adZTfi1b9y0AAAAASUVORK5CYII=", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# create a np.array with selected_model values\n", "\n", "conf_matrix = np.array([[evaluation_counts_output['True Negatives'].values[0], evaluation_counts_output['False Positives'].values[0]],\n", " [evaluation_counts_output['False Negatives'].values[0], evaluation_counts_output['True Positives'].values[0]]])\n", "\n", "fig, ax = plot_confusion_matrix(\n", " conf_mat=conf_matrix,\n", " show_absolute=True,\n", " show_normed=True\n", ")\n", "\n", "print(evaluation_score_output[['Accuracy', 'Precision', 'Recall', 'F1-score']])\n", "plt.show()" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "skip" } }, "source": [ "### **Transformations Report**" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "FEATURE REMOVAL\n", "('the number of columns dropped due to duplications is: ', 104)\n", "('the number of columns dropped due to missing values is: ', 28)\n", "('the number of columns dropped due to low variance is: ', 189)\n", "('the number of columns dropped due to high correlation is: ', 90)\n", "('Total number of columns to be dropped is: ', 411)\n", "('New shape of the dataframe is: ', (1175, 179))\n", "------------------------------------------\n", "OUTLIER REMOVAL\n", "No z-score threshold was selected\n", "No outliers were removed from the dataset\n", "------------------------------------------\n", "SCALING\n", "The dataframe has not been scaled\n", "------------------------------------------\n", "IMPUTATION\n", "('Number of missing values before imputation: ', 1196)\n", "median imputation has been applied\n", "('Number of missing values after imputation: ', 0)\n", "------------------------------------------\n", "FEATURE SELECTION\n", "No feature selection has been applied\n", "('Shape of the training set after no feature selection: ', (1175, 179))\n", "------------------------------------------\n", "IMBALANCE TREATMENT\n", "('Shape of the training set after no resampling: ', (1175, 179))\n", "Value counts of the target variable after no resampling: \n", "pass/fail\n", "0 1097\n", "1 78\n", "dtype: int64\n" ] } ], "source": [ "print('FEATURE REMOVAL')\n", "print(feature_removal_report1)\n", "print(feature_removal_report2)\n", "print(feature_removal_report3)\n", "print(feature_removal_report4)\n", "print(feature_removal_report5)\n", "print(feature_removal_report6)\n", "print('------------------------------------------')\n", "print('OUTLIER REMOVAL')\n", "print(outlier_removal_report0)\n", "print(outlier_removal_report1)\n", "print('------------------------------------------')\n", "print('SCALING')\n", "print(scaling_report0)\n", "print('------------------------------------------')\n", "print('IMPUTATION')\n", "print(imputation_report0)\n", "print(imputation_report1)\n", "print(imputation_report2)\n", "print('------------------------------------------')\n", "print('FEATURE SELECTION')\n", "print(feature_selection_report0)\n", "print(feature_selection_report1)\n", "print('------------------------------------------')\n", "print('IMBALANCE TREATMENT')\n", "print(imbalance_report0)\n", "print(imbalance_report1)\n", "print(imbalance_report2)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }