Upload P2 - Secom Notebook - Mercury.ipynb
Browse files- P2 - Secom Notebook - Mercury.ipynb +147 -43
P2 - Secom Notebook - Mercury.ipynb
CHANGED
@@ -26,7 +26,7 @@
|
|
26 |
},
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
-
"execution_count":
|
30 |
"metadata": {
|
31 |
"slideshow": {
|
32 |
"slide_type": "skip"
|
@@ -53,7 +53,7 @@
|
|
53 |
},
|
54 |
{
|
55 |
"cell_type": "code",
|
56 |
-
"execution_count":
|
57 |
"metadata": {
|
58 |
"slideshow": {
|
59 |
"slide_type": "skip"
|
@@ -64,7 +64,7 @@
|
|
64 |
"data": {
|
65 |
"application/mercury+json": {
|
66 |
"allow_download": true,
|
67 |
-
"code_uid": "App.0.40.24.1-
|
68 |
"continuous_update": false,
|
69 |
"description": "Recumpute everything dynamically",
|
70 |
"full_screen": true,
|
@@ -96,7 +96,7 @@
|
|
96 |
},
|
97 |
{
|
98 |
"cell_type": "code",
|
99 |
-
"execution_count":
|
100 |
"metadata": {
|
101 |
"slideshow": {
|
102 |
"slide_type": "skip"
|
@@ -104,6 +104,7 @@
|
|
104 |
},
|
105 |
"outputs": [],
|
106 |
"source": [
|
|
|
107 |
"# Read the features data from the the url of csv into pandas dataframes and rename the columns to F1, F2, F3, etc.\n",
|
108 |
"# Read the labels data from the url of csv into pandas dataframes and rename the columns to pass/fail and date/time\n",
|
109 |
"\n",
|
@@ -137,7 +138,7 @@
|
|
137 |
},
|
138 |
{
|
139 |
"cell_type": "code",
|
140 |
-
"execution_count":
|
141 |
"metadata": {
|
142 |
"slideshow": {
|
143 |
"slide_type": "skip"
|
@@ -194,7 +195,7 @@
|
|
194 |
},
|
195 |
{
|
196 |
"cell_type": "code",
|
197 |
-
"execution_count":
|
198 |
"metadata": {
|
199 |
"slideshow": {
|
200 |
"slide_type": "skip"
|
@@ -289,7 +290,7 @@
|
|
289 |
},
|
290 |
{
|
291 |
"cell_type": "code",
|
292 |
-
"execution_count":
|
293 |
"metadata": {
|
294 |
"slideshow": {
|
295 |
"slide_type": "skip"
|
@@ -340,7 +341,7 @@
|
|
340 |
},
|
341 |
{
|
342 |
"cell_type": "code",
|
343 |
-
"execution_count":
|
344 |
"metadata": {
|
345 |
"slideshow": {
|
346 |
"slide_type": "skip"
|
@@ -418,7 +419,7 @@
|
|
418 |
},
|
419 |
{
|
420 |
"cell_type": "code",
|
421 |
-
"execution_count":
|
422 |
"metadata": {
|
423 |
"slideshow": {
|
424 |
"slide_type": "skip"
|
@@ -484,6 +485,74 @@
|
|
484 |
" return df_imputed\n"
|
485 |
]
|
486 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
487 |
{
|
488 |
"attachments": {},
|
489 |
"cell_type": "markdown",
|
@@ -498,7 +567,7 @@
|
|
498 |
},
|
499 |
{
|
500 |
"cell_type": "code",
|
501 |
-
"execution_count":
|
502 |
"metadata": {
|
503 |
"slideshow": {
|
504 |
"slide_type": "skip"
|
@@ -569,7 +638,7 @@
|
|
569 |
},
|
570 |
{
|
571 |
"cell_type": "code",
|
572 |
-
"execution_count":
|
573 |
"metadata": {
|
574 |
"slideshow": {
|
575 |
"slide_type": "skip"
|
@@ -658,7 +727,7 @@
|
|
658 |
},
|
659 |
{
|
660 |
"cell_type": "code",
|
661 |
-
"execution_count":
|
662 |
"metadata": {
|
663 |
"slideshow": {
|
664 |
"slide_type": "skip"
|
@@ -747,7 +816,7 @@
|
|
747 |
},
|
748 |
{
|
749 |
"cell_type": "code",
|
750 |
-
"execution_count":
|
751 |
"metadata": {
|
752 |
"slideshow": {
|
753 |
"slide_type": "skip"
|
@@ -761,17 +830,17 @@
|
|
761 |
"yes",
|
762 |
"no"
|
763 |
],
|
764 |
-
"code_uid": "Select.0.40.16.25-
|
765 |
"disabled": false,
|
766 |
"hidden": false,
|
767 |
"label": "Drop Duplicates",
|
768 |
-
"model_id": "
|
769 |
"url_key": "",
|
770 |
"value": "yes",
|
771 |
"widget": "Select"
|
772 |
},
|
773 |
"application/vnd.jupyter.widget-view+json": {
|
774 |
-
"model_id": "
|
775 |
"version_major": 2,
|
776 |
"version_minor": 0
|
777 |
},
|
@@ -785,18 +854,18 @@
|
|
785 |
{
|
786 |
"data": {
|
787 |
"application/mercury+json": {
|
788 |
-
"code_uid": "Text.0.40.15.28-
|
789 |
"disabled": false,
|
790 |
"hidden": false,
|
791 |
"label": "Missing Value Threeshold",
|
792 |
-
"model_id": "
|
793 |
"rows": 1,
|
794 |
"url_key": "",
|
795 |
"value": "80",
|
796 |
"widget": "Text"
|
797 |
},
|
798 |
"application/vnd.jupyter.widget-view+json": {
|
799 |
-
"model_id": "
|
800 |
"version_major": 2,
|
801 |
"version_minor": 0
|
802 |
},
|
@@ -810,18 +879,18 @@
|
|
810 |
{
|
811 |
"data": {
|
812 |
"application/mercury+json": {
|
813 |
-
"code_uid": "Text.0.40.15.31-
|
814 |
"disabled": false,
|
815 |
"hidden": false,
|
816 |
"label": "Variance Threshold",
|
817 |
-
"model_id": "
|
818 |
"rows": 1,
|
819 |
"url_key": "",
|
820 |
"value": "0",
|
821 |
"widget": "Text"
|
822 |
},
|
823 |
"application/vnd.jupyter.widget-view+json": {
|
824 |
-
"model_id": "
|
825 |
"version_major": 2,
|
826 |
"version_minor": 0
|
827 |
},
|
@@ -835,18 +904,18 @@
|
|
835 |
{
|
836 |
"data": {
|
837 |
"application/mercury+json": {
|
838 |
-
"code_uid": "Text.0.40.15.34-
|
839 |
"disabled": false,
|
840 |
"hidden": false,
|
841 |
"label": "Correlation Threshold",
|
842 |
-
"model_id": "
|
843 |
"rows": 1,
|
844 |
"url_key": "",
|
845 |
"value": "1",
|
846 |
"widget": "Text"
|
847 |
},
|
848 |
"application/vnd.jupyter.widget-view+json": {
|
849 |
-
"model_id": "
|
850 |
"version_major": 2,
|
851 |
"version_minor": 0
|
852 |
},
|
@@ -866,17 +935,17 @@
|
|
866 |
4,
|
867 |
5
|
868 |
],
|
869 |
-
"code_uid": "Select.0.40.16.38-
|
870 |
"disabled": false,
|
871 |
"hidden": false,
|
872 |
"label": "Outlier Removal Threshold",
|
873 |
-
"model_id": "
|
874 |
"url_key": "",
|
875 |
"value": "none",
|
876 |
"widget": "Select"
|
877 |
},
|
878 |
"application/vnd.jupyter.widget-view+json": {
|
879 |
-
"model_id": "
|
880 |
"version_major": 2,
|
881 |
"version_minor": 0
|
882 |
},
|
@@ -897,17 +966,17 @@
|
|
897 |
"minmax",
|
898 |
"robust"
|
899 |
],
|
900 |
-
"code_uid": "Select.0.40.16.46-
|
901 |
"disabled": false,
|
902 |
"hidden": false,
|
903 |
"label": "Scaling Variables",
|
904 |
-
"model_id": "
|
905 |
"url_key": "",
|
906 |
"value": "none",
|
907 |
"widget": "Select"
|
908 |
},
|
909 |
"application/vnd.jupyter.widget-view+json": {
|
910 |
-
"model_id": "
|
911 |
"version_major": 2,
|
912 |
"version_minor": 0
|
913 |
},
|
@@ -927,17 +996,48 @@
|
|
927 |
"knn",
|
928 |
"most_frequent"
|
929 |
],
|
930 |
-
"code_uid": "Select.0.40.16.50-
|
931 |
"disabled": false,
|
932 |
"hidden": false,
|
933 |
"label": "Imputation Methods",
|
934 |
-
"model_id": "
|
935 |
"url_key": "",
|
936 |
"value": "mean",
|
937 |
"widget": "Select"
|
938 |
},
|
939 |
"application/vnd.jupyter.widget-view+json": {
|
940 |
-
"model_id": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
941 |
"version_major": 2,
|
942 |
"version_minor": 0
|
943 |
},
|
@@ -957,17 +1057,17 @@
|
|
957 |
"undersampling",
|
958 |
"rose"
|
959 |
],
|
960 |
-
"code_uid": "Select.0.40.16.
|
961 |
"disabled": false,
|
962 |
"hidden": false,
|
963 |
"label": "Imbalance Treatment",
|
964 |
-
"model_id": "
|
965 |
"url_key": "",
|
966 |
"value": "none",
|
967 |
"widget": "Select"
|
968 |
},
|
969 |
"application/vnd.jupyter.widget-view+json": {
|
970 |
-
"model_id": "
|
971 |
"version_major": 2,
|
972 |
"version_minor": 0
|
973 |
},
|
@@ -990,17 +1090,17 @@
|
|
990 |
"decision_tree",
|
991 |
"xgboost"
|
992 |
],
|
993 |
-
"code_uid": "Select.0.40.16.
|
994 |
"disabled": false,
|
995 |
"hidden": false,
|
996 |
"label": "Model Selection",
|
997 |
-
"model_id": "
|
998 |
"url_key": "",
|
999 |
"value": "random_forest",
|
1000 |
"widget": "Select"
|
1001 |
},
|
1002 |
"application/vnd.jupyter.widget-view+json": {
|
1003 |
-
"model_id": "
|
1004 |
"version_major": 2,
|
1005 |
"version_minor": 0
|
1006 |
},
|
@@ -1067,6 +1167,10 @@
|
|
1067 |
"input_n_neighbors = 5 # only for knn imputation\n",
|
1068 |
"input_imputation_method = str(input_imputation_method.value)\n",
|
1069 |
"\n",
|
|
|
|
|
|
|
|
|
1070 |
"# input imbalance treatment variables\n",
|
1071 |
"input_imbalance_treatment = mr.Select(label=\"Imbalance Treatment\", value=\"none\", choices=['none', 'smote', 'undersampling', 'rose']) # 'none', 'smote', 'undersampling', 'rose'\n",
|
1072 |
"input_imbalance_treatment = str(input_imbalance_treatment.value)\n",
|
@@ -1104,7 +1208,7 @@
|
|
1104 |
},
|
1105 |
{
|
1106 |
"cell_type": "code",
|
1107 |
-
"execution_count":
|
1108 |
"metadata": {
|
1109 |
"slideshow": {
|
1110 |
"slide_type": "skip"
|
@@ -1182,7 +1286,7 @@
|
|
1182 |
},
|
1183 |
{
|
1184 |
"cell_type": "code",
|
1185 |
-
"execution_count":
|
1186 |
"metadata": {
|
1187 |
"slideshow": {
|
1188 |
"slide_type": "skip"
|
@@ -1220,7 +1324,7 @@
|
|
1220 |
},
|
1221 |
{
|
1222 |
"cell_type": "code",
|
1223 |
-
"execution_count":
|
1224 |
"metadata": {
|
1225 |
"slideshow": {
|
1226 |
"slide_type": "slide"
|
|
|
26 |
},
|
27 |
{
|
28 |
"cell_type": "code",
|
29 |
+
"execution_count": 42,
|
30 |
"metadata": {
|
31 |
"slideshow": {
|
32 |
"slide_type": "skip"
|
|
|
53 |
},
|
54 |
{
|
55 |
"cell_type": "code",
|
56 |
+
"execution_count": 43,
|
57 |
"metadata": {
|
58 |
"slideshow": {
|
59 |
"slide_type": "skip"
|
|
|
64 |
"data": {
|
65 |
"application/mercury+json": {
|
66 |
"allow_download": true,
|
67 |
+
"code_uid": "App.0.40.24.1-rand0e93859a",
|
68 |
"continuous_update": false,
|
69 |
"description": "Recumpute everything dynamically",
|
70 |
"full_screen": true,
|
|
|
96 |
},
|
97 |
{
|
98 |
"cell_type": "code",
|
99 |
+
"execution_count": 44,
|
100 |
"metadata": {
|
101 |
"slideshow": {
|
102 |
"slide_type": "skip"
|
|
|
104 |
},
|
105 |
"outputs": [],
|
106 |
"source": [
|
107 |
+
" \n",
|
108 |
"# Read the features data from the the url of csv into pandas dataframes and rename the columns to F1, F2, F3, etc.\n",
|
109 |
"# Read the labels data from the url of csv into pandas dataframes and rename the columns to pass/fail and date/time\n",
|
110 |
"\n",
|
|
|
138 |
},
|
139 |
{
|
140 |
"cell_type": "code",
|
141 |
+
"execution_count": 45,
|
142 |
"metadata": {
|
143 |
"slideshow": {
|
144 |
"slide_type": "skip"
|
|
|
195 |
},
|
196 |
{
|
197 |
"cell_type": "code",
|
198 |
+
"execution_count": 46,
|
199 |
"metadata": {
|
200 |
"slideshow": {
|
201 |
"slide_type": "skip"
|
|
|
290 |
},
|
291 |
{
|
292 |
"cell_type": "code",
|
293 |
+
"execution_count": 47,
|
294 |
"metadata": {
|
295 |
"slideshow": {
|
296 |
"slide_type": "skip"
|
|
|
341 |
},
|
342 |
{
|
343 |
"cell_type": "code",
|
344 |
+
"execution_count": 48,
|
345 |
"metadata": {
|
346 |
"slideshow": {
|
347 |
"slide_type": "skip"
|
|
|
419 |
},
|
420 |
{
|
421 |
"cell_type": "code",
|
422 |
+
"execution_count": 49,
|
423 |
"metadata": {
|
424 |
"slideshow": {
|
425 |
"slide_type": "skip"
|
|
|
485 |
" return df_imputed\n"
|
486 |
]
|
487 |
},
|
488 |
+
{
|
489 |
+
"cell_type": "code",
|
490 |
+
"execution_count": 50,
|
491 |
+
"metadata": {},
|
492 |
+
"outputs": [],
|
493 |
+
"source": [
|
494 |
+
"def feature_selection(method, X_train, y_train):\n",
|
495 |
+
"\n",
|
496 |
+
" global feature_selection_var\n",
|
497 |
+
" global selected_features \n",
|
498 |
+
"\n",
|
499 |
+
" if method == 'boruta':\n",
|
500 |
+
" print('Selected method is: ', method)\n",
|
501 |
+
" from boruta import BorutaPy\n",
|
502 |
+
" from sklearn.ensemble import RandomForestClassifier\n",
|
503 |
+
" rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)\n",
|
504 |
+
" boruta_selector = BorutaPy(rf,n_estimators='auto', verbose=0, random_state=42)\n",
|
505 |
+
" boruta_selector.fit(X_train.values, y_train.values.ravel())\n",
|
506 |
+
" selected_feature_indices = boruta_selector.support_\n",
|
507 |
+
" selected_columns = X_train.columns[selected_feature_indices]\n",
|
508 |
+
" X_train_filtered = X_train.iloc[:, selected_feature_indices]\n",
|
509 |
+
" print('Shape of the training set after feature selection with Boruta: ', X_train_filtered.shape)\n",
|
510 |
+
" return X_train_filtered, selected_columns\n",
|
511 |
+
" \n",
|
512 |
+
" if method == 'none':\n",
|
513 |
+
" print('Selected method is: ', method)\n",
|
514 |
+
" X_train_filtered = X_train\n",
|
515 |
+
" print('Shape of the training set after no feature selection: ', X_train_filtered.shape)\n",
|
516 |
+
" feature_selection_var = 'none'\n",
|
517 |
+
" selected_features = X_train_filtered.columns\n",
|
518 |
+
" return X_train_filtered, selected_features \n",
|
519 |
+
" \n",
|
520 |
+
" if method == 'lasso':\n",
|
521 |
+
" print('Selected method is: ', method)\n",
|
522 |
+
" from sklearn.linear_model import LassoCV\n",
|
523 |
+
" from sklearn.feature_selection import SelectFromModel\n",
|
524 |
+
" lasso = LassoCV().fit(X_train, y_train)\n",
|
525 |
+
" model = SelectFromModel(lasso, prefit=True)\n",
|
526 |
+
" X_train_filtered = model.transform(X_train)\n",
|
527 |
+
" selected_features = X_train.columns[model.get_support()]\n",
|
528 |
+
" print('Shape of the training set after feature selection with LassoCV: ', X_train_filtered.shape)\n",
|
529 |
+
" feature_selection_var = 'lasso'\n",
|
530 |
+
" return X_train_filtered, selected_features\n",
|
531 |
+
" \n",
|
532 |
+
" if method == 'pca':\n",
|
533 |
+
" print('Selected method is: ', method)\n",
|
534 |
+
" from sklearn.decomposition import PCA\n",
|
535 |
+
" pca = PCA(n_components=15)\n",
|
536 |
+
" X_train_pca = pca.fit_transform(X_train)\n",
|
537 |
+
" selected_features = X_train.columns[pca.explained_variance_ratio_.argsort()[::-1]][:15]\n",
|
538 |
+
" print('Shape of the training set after feature selection with PCA: ', X_train_pca.shape)\n",
|
539 |
+
" feature_selection_var = 'pca'\n",
|
540 |
+
" return X_train_pca, selected_features\n",
|
541 |
+
" \n",
|
542 |
+
" if method == 'rfe':\n",
|
543 |
+
" print('Selected method is: ', method)\n",
|
544 |
+
" from sklearn.feature_selection import RFE\n",
|
545 |
+
" from sklearn.ensemble import RandomForestClassifier\n",
|
546 |
+
" rfe_selector = RFE(estimator=RandomForestClassifier(n_estimators=100, n_jobs=-1), n_features_to_select=15, step=10, verbose=0)\n",
|
547 |
+
" rfe_selector.fit(X_train, y_train)\n",
|
548 |
+
" selected_features = X_train.columns[rfe_selector.support_]\n",
|
549 |
+
" X_train_filtered = X_train.iloc[:, rfe_selector.support_]\n",
|
550 |
+
" print('Shape of the training set after feature selection with RFE: ', X_train_filtered.shape)\n",
|
551 |
+
" feature_selection_var = 'rfe'\n",
|
552 |
+
" return X_train_filtered, selected_features\n",
|
553 |
+
" "
|
554 |
+
]
|
555 |
+
},
|
556 |
{
|
557 |
"attachments": {},
|
558 |
"cell_type": "markdown",
|
|
|
567 |
},
|
568 |
{
|
569 |
"cell_type": "code",
|
570 |
+
"execution_count": 51,
|
571 |
"metadata": {
|
572 |
"slideshow": {
|
573 |
"slide_type": "skip"
|
|
|
638 |
},
|
639 |
{
|
640 |
"cell_type": "code",
|
641 |
+
"execution_count": 52,
|
642 |
"metadata": {
|
643 |
"slideshow": {
|
644 |
"slide_type": "skip"
|
|
|
727 |
},
|
728 |
{
|
729 |
"cell_type": "code",
|
730 |
+
"execution_count": 53,
|
731 |
"metadata": {
|
732 |
"slideshow": {
|
733 |
"slide_type": "skip"
|
|
|
816 |
},
|
817 |
{
|
818 |
"cell_type": "code",
|
819 |
+
"execution_count": 54,
|
820 |
"metadata": {
|
821 |
"slideshow": {
|
822 |
"slide_type": "skip"
|
|
|
830 |
"yes",
|
831 |
"no"
|
832 |
],
|
833 |
+
"code_uid": "Select.0.40.16.25-rand98b210b9",
|
834 |
"disabled": false,
|
835 |
"hidden": false,
|
836 |
"label": "Drop Duplicates",
|
837 |
+
"model_id": "f2d0fb31478a477ea1b0d4c4aa80fb2e",
|
838 |
"url_key": "",
|
839 |
"value": "yes",
|
840 |
"widget": "Select"
|
841 |
},
|
842 |
"application/vnd.jupyter.widget-view+json": {
|
843 |
+
"model_id": "f2d0fb31478a477ea1b0d4c4aa80fb2e",
|
844 |
"version_major": 2,
|
845 |
"version_minor": 0
|
846 |
},
|
|
|
854 |
{
|
855 |
"data": {
|
856 |
"application/mercury+json": {
|
857 |
+
"code_uid": "Text.0.40.15.28-randc77c765e",
|
858 |
"disabled": false,
|
859 |
"hidden": false,
|
860 |
"label": "Missing Value Threeshold",
|
861 |
+
"model_id": "f97f46b880e3434082498e1e720b0b65",
|
862 |
"rows": 1,
|
863 |
"url_key": "",
|
864 |
"value": "80",
|
865 |
"widget": "Text"
|
866 |
},
|
867 |
"application/vnd.jupyter.widget-view+json": {
|
868 |
+
"model_id": "f97f46b880e3434082498e1e720b0b65",
|
869 |
"version_major": 2,
|
870 |
"version_minor": 0
|
871 |
},
|
|
|
879 |
{
|
880 |
"data": {
|
881 |
"application/mercury+json": {
|
882 |
+
"code_uid": "Text.0.40.15.31-rand54dd8817",
|
883 |
"disabled": false,
|
884 |
"hidden": false,
|
885 |
"label": "Variance Threshold",
|
886 |
+
"model_id": "4839b45f7e9b483d9fb8ee7fc05f1e19",
|
887 |
"rows": 1,
|
888 |
"url_key": "",
|
889 |
"value": "0",
|
890 |
"widget": "Text"
|
891 |
},
|
892 |
"application/vnd.jupyter.widget-view+json": {
|
893 |
+
"model_id": "4839b45f7e9b483d9fb8ee7fc05f1e19",
|
894 |
"version_major": 2,
|
895 |
"version_minor": 0
|
896 |
},
|
|
|
904 |
{
|
905 |
"data": {
|
906 |
"application/mercury+json": {
|
907 |
+
"code_uid": "Text.0.40.15.34-rand811824bd",
|
908 |
"disabled": false,
|
909 |
"hidden": false,
|
910 |
"label": "Correlation Threshold",
|
911 |
+
"model_id": "10033d424ab949f7b51462e444e17ba7",
|
912 |
"rows": 1,
|
913 |
"url_key": "",
|
914 |
"value": "1",
|
915 |
"widget": "Text"
|
916 |
},
|
917 |
"application/vnd.jupyter.widget-view+json": {
|
918 |
+
"model_id": "10033d424ab949f7b51462e444e17ba7",
|
919 |
"version_major": 2,
|
920 |
"version_minor": 0
|
921 |
},
|
|
|
935 |
4,
|
936 |
5
|
937 |
],
|
938 |
+
"code_uid": "Select.0.40.16.38-rand10d00d99",
|
939 |
"disabled": false,
|
940 |
"hidden": false,
|
941 |
"label": "Outlier Removal Threshold",
|
942 |
+
"model_id": "96b8980bceaf46459d9ec06c8fb7c818",
|
943 |
"url_key": "",
|
944 |
"value": "none",
|
945 |
"widget": "Select"
|
946 |
},
|
947 |
"application/vnd.jupyter.widget-view+json": {
|
948 |
+
"model_id": "96b8980bceaf46459d9ec06c8fb7c818",
|
949 |
"version_major": 2,
|
950 |
"version_minor": 0
|
951 |
},
|
|
|
966 |
"minmax",
|
967 |
"robust"
|
968 |
],
|
969 |
+
"code_uid": "Select.0.40.16.46-rand1bc79c9d",
|
970 |
"disabled": false,
|
971 |
"hidden": false,
|
972 |
"label": "Scaling Variables",
|
973 |
+
"model_id": "e7650cea7a834d588a995407052e1f2c",
|
974 |
"url_key": "",
|
975 |
"value": "none",
|
976 |
"widget": "Select"
|
977 |
},
|
978 |
"application/vnd.jupyter.widget-view+json": {
|
979 |
+
"model_id": "e7650cea7a834d588a995407052e1f2c",
|
980 |
"version_major": 2,
|
981 |
"version_minor": 0
|
982 |
},
|
|
|
996 |
"knn",
|
997 |
"most_frequent"
|
998 |
],
|
999 |
+
"code_uid": "Select.0.40.16.50-rand69ae31a0",
|
1000 |
"disabled": false,
|
1001 |
"hidden": false,
|
1002 |
"label": "Imputation Methods",
|
1003 |
+
"model_id": "652a64af16174970919183e6ab1c5b53",
|
1004 |
"url_key": "",
|
1005 |
"value": "mean",
|
1006 |
"widget": "Select"
|
1007 |
},
|
1008 |
"application/vnd.jupyter.widget-view+json": {
|
1009 |
+
"model_id": "652a64af16174970919183e6ab1c5b53",
|
1010 |
+
"version_major": 2,
|
1011 |
+
"version_minor": 0
|
1012 |
+
},
|
1013 |
+
"text/plain": [
|
1014 |
+
"mercury.Select"
|
1015 |
+
]
|
1016 |
+
},
|
1017 |
+
"metadata": {},
|
1018 |
+
"output_type": "display_data"
|
1019 |
+
},
|
1020 |
+
{
|
1021 |
+
"data": {
|
1022 |
+
"application/mercury+json": {
|
1023 |
+
"choices": [
|
1024 |
+
"none",
|
1025 |
+
"lasso",
|
1026 |
+
"rfe",
|
1027 |
+
"pca",
|
1028 |
+
"boruta"
|
1029 |
+
],
|
1030 |
+
"code_uid": "Select.0.40.16.55-rand148632f9",
|
1031 |
+
"disabled": false,
|
1032 |
+
"hidden": false,
|
1033 |
+
"label": "Feature Selection",
|
1034 |
+
"model_id": "9a1a199471314cd7a3363ea25d9d341a",
|
1035 |
+
"url_key": "",
|
1036 |
+
"value": "none",
|
1037 |
+
"widget": "Select"
|
1038 |
+
},
|
1039 |
+
"application/vnd.jupyter.widget-view+json": {
|
1040 |
+
"model_id": "9a1a199471314cd7a3363ea25d9d341a",
|
1041 |
"version_major": 2,
|
1042 |
"version_minor": 0
|
1043 |
},
|
|
|
1057 |
"undersampling",
|
1058 |
"rose"
|
1059 |
],
|
1060 |
+
"code_uid": "Select.0.40.16.59-rand3a34b3e3",
|
1061 |
"disabled": false,
|
1062 |
"hidden": false,
|
1063 |
"label": "Imbalance Treatment",
|
1064 |
+
"model_id": "358dd80171af4de2a944c3077b2f48d8",
|
1065 |
"url_key": "",
|
1066 |
"value": "none",
|
1067 |
"widget": "Select"
|
1068 |
},
|
1069 |
"application/vnd.jupyter.widget-view+json": {
|
1070 |
+
"model_id": "358dd80171af4de2a944c3077b2f48d8",
|
1071 |
"version_major": 2,
|
1072 |
"version_minor": 0
|
1073 |
},
|
|
|
1090 |
"decision_tree",
|
1091 |
"xgboost"
|
1092 |
],
|
1093 |
+
"code_uid": "Select.0.40.16.64-rand4b9cf5e0",
|
1094 |
"disabled": false,
|
1095 |
"hidden": false,
|
1096 |
"label": "Model Selection",
|
1097 |
+
"model_id": "8477ca5211bd4914861b3e48cda21c10",
|
1098 |
"url_key": "",
|
1099 |
"value": "random_forest",
|
1100 |
"widget": "Select"
|
1101 |
},
|
1102 |
"application/vnd.jupyter.widget-view+json": {
|
1103 |
+
"model_id": "8477ca5211bd4914861b3e48cda21c10",
|
1104 |
"version_major": 2,
|
1105 |
"version_minor": 0
|
1106 |
},
|
|
|
1167 |
"input_n_neighbors = 5 # only for knn imputation\n",
|
1168 |
"input_imputation_method = str(input_imputation_method.value)\n",
|
1169 |
"\n",
|
1170 |
+
"# input feature selection variables\n",
|
1171 |
+
"input_feature_selection = mr.Select(label=\"Feature Selection\", value=\"none\", choices=['none', 'lasso', 'rfe', 'pca', 'boruta']) # 'none', 'lasso', 'rfe', 'pca', 'boruta'\n",
|
1172 |
+
"input_feature_selection = str(input_feature_selection.value)\n",
|
1173 |
+
"\n",
|
1174 |
"# input imbalance treatment variables\n",
|
1175 |
"input_imbalance_treatment = mr.Select(label=\"Imbalance Treatment\", value=\"none\", choices=['none', 'smote', 'undersampling', 'rose']) # 'none', 'smote', 'undersampling', 'rose'\n",
|
1176 |
"input_imbalance_treatment = str(input_imbalance_treatment.value)\n",
|
|
|
1208 |
},
|
1209 |
{
|
1210 |
"cell_type": "code",
|
1211 |
+
"execution_count": 55,
|
1212 |
"metadata": {
|
1213 |
"slideshow": {
|
1214 |
"slide_type": "skip"
|
|
|
1286 |
},
|
1287 |
{
|
1288 |
"cell_type": "code",
|
1289 |
+
"execution_count": 56,
|
1290 |
"metadata": {
|
1291 |
"slideshow": {
|
1292 |
"slide_type": "skip"
|
|
|
1324 |
},
|
1325 |
{
|
1326 |
"cell_type": "code",
|
1327 |
+
"execution_count": 57,
|
1328 |
"metadata": {
|
1329 |
"slideshow": {
|
1330 |
"slide_type": "slide"
|