erjonb commited on
Commit
8fecf57
·
1 Parent(s): dea2a5b

Upload P2 - Secom Notebook - Mercury.ipynb

Browse files
Files changed (1) hide show
  1. P2 - Secom Notebook - Mercury.ipynb +147 -43
P2 - Secom Notebook - Mercury.ipynb CHANGED
@@ -26,7 +26,7 @@
26
  },
27
  {
28
  "cell_type": "code",
29
- "execution_count": 4,
30
  "metadata": {
31
  "slideshow": {
32
  "slide_type": "skip"
@@ -53,7 +53,7 @@
53
  },
54
  {
55
  "cell_type": "code",
56
- "execution_count": 5,
57
  "metadata": {
58
  "slideshow": {
59
  "slide_type": "skip"
@@ -64,7 +64,7 @@
64
  "data": {
65
  "application/mercury+json": {
66
  "allow_download": true,
67
- "code_uid": "App.0.40.24.1-randef62ebb1",
68
  "continuous_update": false,
69
  "description": "Recumpute everything dynamically",
70
  "full_screen": true,
@@ -96,7 +96,7 @@
96
  },
97
  {
98
  "cell_type": "code",
99
- "execution_count": 6,
100
  "metadata": {
101
  "slideshow": {
102
  "slide_type": "skip"
@@ -104,6 +104,7 @@
104
  },
105
  "outputs": [],
106
  "source": [
 
107
  "# Read the features data from the the url of csv into pandas dataframes and rename the columns to F1, F2, F3, etc.\n",
108
  "# Read the labels data from the url of csv into pandas dataframes and rename the columns to pass/fail and date/time\n",
109
  "\n",
@@ -137,7 +138,7 @@
137
  },
138
  {
139
  "cell_type": "code",
140
- "execution_count": 7,
141
  "metadata": {
142
  "slideshow": {
143
  "slide_type": "skip"
@@ -194,7 +195,7 @@
194
  },
195
  {
196
  "cell_type": "code",
197
- "execution_count": 8,
198
  "metadata": {
199
  "slideshow": {
200
  "slide_type": "skip"
@@ -289,7 +290,7 @@
289
  },
290
  {
291
  "cell_type": "code",
292
- "execution_count": 9,
293
  "metadata": {
294
  "slideshow": {
295
  "slide_type": "skip"
@@ -340,7 +341,7 @@
340
  },
341
  {
342
  "cell_type": "code",
343
- "execution_count": 10,
344
  "metadata": {
345
  "slideshow": {
346
  "slide_type": "skip"
@@ -418,7 +419,7 @@
418
  },
419
  {
420
  "cell_type": "code",
421
- "execution_count": 11,
422
  "metadata": {
423
  "slideshow": {
424
  "slide_type": "skip"
@@ -484,6 +485,74 @@
484
  " return df_imputed\n"
485
  ]
486
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  {
488
  "attachments": {},
489
  "cell_type": "markdown",
@@ -498,7 +567,7 @@
498
  },
499
  {
500
  "cell_type": "code",
501
- "execution_count": 12,
502
  "metadata": {
503
  "slideshow": {
504
  "slide_type": "skip"
@@ -569,7 +638,7 @@
569
  },
570
  {
571
  "cell_type": "code",
572
- "execution_count": 13,
573
  "metadata": {
574
  "slideshow": {
575
  "slide_type": "skip"
@@ -658,7 +727,7 @@
658
  },
659
  {
660
  "cell_type": "code",
661
- "execution_count": 14,
662
  "metadata": {
663
  "slideshow": {
664
  "slide_type": "skip"
@@ -747,7 +816,7 @@
747
  },
748
  {
749
  "cell_type": "code",
750
- "execution_count": 15,
751
  "metadata": {
752
  "slideshow": {
753
  "slide_type": "skip"
@@ -761,17 +830,17 @@
761
  "yes",
762
  "no"
763
  ],
764
- "code_uid": "Select.0.40.16.25-rand77ec76da",
765
  "disabled": false,
766
  "hidden": false,
767
  "label": "Drop Duplicates",
768
- "model_id": "3287bcba5d3e42019072b9ba3c8cee67",
769
  "url_key": "",
770
  "value": "yes",
771
  "widget": "Select"
772
  },
773
  "application/vnd.jupyter.widget-view+json": {
774
- "model_id": "3287bcba5d3e42019072b9ba3c8cee67",
775
  "version_major": 2,
776
  "version_minor": 0
777
  },
@@ -785,18 +854,18 @@
785
  {
786
  "data": {
787
  "application/mercury+json": {
788
- "code_uid": "Text.0.40.15.28-rand0b79dd2f",
789
  "disabled": false,
790
  "hidden": false,
791
  "label": "Missing Value Threeshold",
792
- "model_id": "2927629f747d41209e38703c8dddc4cb",
793
  "rows": 1,
794
  "url_key": "",
795
  "value": "80",
796
  "widget": "Text"
797
  },
798
  "application/vnd.jupyter.widget-view+json": {
799
- "model_id": "2927629f747d41209e38703c8dddc4cb",
800
  "version_major": 2,
801
  "version_minor": 0
802
  },
@@ -810,18 +879,18 @@
810
  {
811
  "data": {
812
  "application/mercury+json": {
813
- "code_uid": "Text.0.40.15.31-rande8db764a",
814
  "disabled": false,
815
  "hidden": false,
816
  "label": "Variance Threshold",
817
- "model_id": "0832e29dacb44b0a8da9eccc6702999f",
818
  "rows": 1,
819
  "url_key": "",
820
  "value": "0",
821
  "widget": "Text"
822
  },
823
  "application/vnd.jupyter.widget-view+json": {
824
- "model_id": "0832e29dacb44b0a8da9eccc6702999f",
825
  "version_major": 2,
826
  "version_minor": 0
827
  },
@@ -835,18 +904,18 @@
835
  {
836
  "data": {
837
  "application/mercury+json": {
838
- "code_uid": "Text.0.40.15.34-randb2ccd11d",
839
  "disabled": false,
840
  "hidden": false,
841
  "label": "Correlation Threshold",
842
- "model_id": "b4688cca688c46b8a77ce5c0fc8a808f",
843
  "rows": 1,
844
  "url_key": "",
845
  "value": "1",
846
  "widget": "Text"
847
  },
848
  "application/vnd.jupyter.widget-view+json": {
849
- "model_id": "b4688cca688c46b8a77ce5c0fc8a808f",
850
  "version_major": 2,
851
  "version_minor": 0
852
  },
@@ -866,17 +935,17 @@
866
  4,
867
  5
868
  ],
869
- "code_uid": "Select.0.40.16.38-rand5391f652",
870
  "disabled": false,
871
  "hidden": false,
872
  "label": "Outlier Removal Threshold",
873
- "model_id": "8aa78a5763854991bff8c4e0ce199acc",
874
  "url_key": "",
875
  "value": "none",
876
  "widget": "Select"
877
  },
878
  "application/vnd.jupyter.widget-view+json": {
879
- "model_id": "8aa78a5763854991bff8c4e0ce199acc",
880
  "version_major": 2,
881
  "version_minor": 0
882
  },
@@ -897,17 +966,17 @@
897
  "minmax",
898
  "robust"
899
  ],
900
- "code_uid": "Select.0.40.16.46-rand581c3f74",
901
  "disabled": false,
902
  "hidden": false,
903
  "label": "Scaling Variables",
904
- "model_id": "9c7f7fe0460f45f5bdbf5b93e0e7e185",
905
  "url_key": "",
906
  "value": "none",
907
  "widget": "Select"
908
  },
909
  "application/vnd.jupyter.widget-view+json": {
910
- "model_id": "9c7f7fe0460f45f5bdbf5b93e0e7e185",
911
  "version_major": 2,
912
  "version_minor": 0
913
  },
@@ -927,17 +996,48 @@
927
  "knn",
928
  "most_frequent"
929
  ],
930
- "code_uid": "Select.0.40.16.50-randd879e6bf",
931
  "disabled": false,
932
  "hidden": false,
933
  "label": "Imputation Methods",
934
- "model_id": "c67be681353d4115a0f4f2df41ba8725",
935
  "url_key": "",
936
  "value": "mean",
937
  "widget": "Select"
938
  },
939
  "application/vnd.jupyter.widget-view+json": {
940
- "model_id": "c67be681353d4115a0f4f2df41ba8725",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
941
  "version_major": 2,
942
  "version_minor": 0
943
  },
@@ -957,17 +1057,17 @@
957
  "undersampling",
958
  "rose"
959
  ],
960
- "code_uid": "Select.0.40.16.55-randbc53979d",
961
  "disabled": false,
962
  "hidden": false,
963
  "label": "Imbalance Treatment",
964
- "model_id": "fa34ae8676274bb192425ae3901ca186",
965
  "url_key": "",
966
  "value": "none",
967
  "widget": "Select"
968
  },
969
  "application/vnd.jupyter.widget-view+json": {
970
- "model_id": "fa34ae8676274bb192425ae3901ca186",
971
  "version_major": 2,
972
  "version_minor": 0
973
  },
@@ -990,17 +1090,17 @@
990
  "decision_tree",
991
  "xgboost"
992
  ],
993
- "code_uid": "Select.0.40.16.60-rand0bc5431d",
994
  "disabled": false,
995
  "hidden": false,
996
  "label": "Model Selection",
997
- "model_id": "2b5c968f1dc74736910ad206a4c55af0",
998
  "url_key": "",
999
  "value": "random_forest",
1000
  "widget": "Select"
1001
  },
1002
  "application/vnd.jupyter.widget-view+json": {
1003
- "model_id": "2b5c968f1dc74736910ad206a4c55af0",
1004
  "version_major": 2,
1005
  "version_minor": 0
1006
  },
@@ -1067,6 +1167,10 @@
1067
  "input_n_neighbors = 5 # only for knn imputation\n",
1068
  "input_imputation_method = str(input_imputation_method.value)\n",
1069
  "\n",
 
 
 
 
1070
  "# input imbalance treatment variables\n",
1071
  "input_imbalance_treatment = mr.Select(label=\"Imbalance Treatment\", value=\"none\", choices=['none', 'smote', 'undersampling', 'rose']) # 'none', 'smote', 'undersampling', 'rose'\n",
1072
  "input_imbalance_treatment = str(input_imbalance_treatment.value)\n",
@@ -1104,7 +1208,7 @@
1104
  },
1105
  {
1106
  "cell_type": "code",
1107
- "execution_count": 16,
1108
  "metadata": {
1109
  "slideshow": {
1110
  "slide_type": "skip"
@@ -1182,7 +1286,7 @@
1182
  },
1183
  {
1184
  "cell_type": "code",
1185
- "execution_count": 17,
1186
  "metadata": {
1187
  "slideshow": {
1188
  "slide_type": "skip"
@@ -1220,7 +1324,7 @@
1220
  },
1221
  {
1222
  "cell_type": "code",
1223
- "execution_count": 18,
1224
  "metadata": {
1225
  "slideshow": {
1226
  "slide_type": "slide"
 
26
  },
27
  {
28
  "cell_type": "code",
29
+ "execution_count": 42,
30
  "metadata": {
31
  "slideshow": {
32
  "slide_type": "skip"
 
53
  },
54
  {
55
  "cell_type": "code",
56
+ "execution_count": 43,
57
  "metadata": {
58
  "slideshow": {
59
  "slide_type": "skip"
 
64
  "data": {
65
  "application/mercury+json": {
66
  "allow_download": true,
67
+ "code_uid": "App.0.40.24.1-rand0e93859a",
68
  "continuous_update": false,
69
  "description": "Recumpute everything dynamically",
70
  "full_screen": true,
 
96
  },
97
  {
98
  "cell_type": "code",
99
+ "execution_count": 44,
100
  "metadata": {
101
  "slideshow": {
102
  "slide_type": "skip"
 
104
  },
105
  "outputs": [],
106
  "source": [
107
+ " \n",
108
  "# Read the features data from the the url of csv into pandas dataframes and rename the columns to F1, F2, F3, etc.\n",
109
  "# Read the labels data from the url of csv into pandas dataframes and rename the columns to pass/fail and date/time\n",
110
  "\n",
 
138
  },
139
  {
140
  "cell_type": "code",
141
+ "execution_count": 45,
142
  "metadata": {
143
  "slideshow": {
144
  "slide_type": "skip"
 
195
  },
196
  {
197
  "cell_type": "code",
198
+ "execution_count": 46,
199
  "metadata": {
200
  "slideshow": {
201
  "slide_type": "skip"
 
290
  },
291
  {
292
  "cell_type": "code",
293
+ "execution_count": 47,
294
  "metadata": {
295
  "slideshow": {
296
  "slide_type": "skip"
 
341
  },
342
  {
343
  "cell_type": "code",
344
+ "execution_count": 48,
345
  "metadata": {
346
  "slideshow": {
347
  "slide_type": "skip"
 
419
  },
420
  {
421
  "cell_type": "code",
422
+ "execution_count": 49,
423
  "metadata": {
424
  "slideshow": {
425
  "slide_type": "skip"
 
485
  " return df_imputed\n"
486
  ]
487
  },
488
+ {
489
+ "cell_type": "code",
490
+ "execution_count": 50,
491
+ "metadata": {},
492
+ "outputs": [],
493
+ "source": [
494
+ "def feature_selection(method, X_train, y_train):\n",
495
+ "\n",
496
+ " global feature_selection_var\n",
497
+ " global selected_features \n",
498
+ "\n",
499
+ " if method == 'boruta':\n",
500
+ " print('Selected method is: ', method)\n",
501
+ " from boruta import BorutaPy\n",
502
+ " from sklearn.ensemble import RandomForestClassifier\n",
503
+ " rf = RandomForestClassifier(n_estimators=100, n_jobs=-1)\n",
504
+ " boruta_selector = BorutaPy(rf,n_estimators='auto', verbose=0, random_state=42)\n",
505
+ " boruta_selector.fit(X_train.values, y_train.values.ravel())\n",
506
+ " selected_feature_indices = boruta_selector.support_\n",
507
+ " selected_columns = X_train.columns[selected_feature_indices]\n",
508
+ " X_train_filtered = X_train.iloc[:, selected_feature_indices]\n",
509
+ " print('Shape of the training set after feature selection with Boruta: ', X_train_filtered.shape)\n",
510
+ " return X_train_filtered, selected_columns\n",
511
+ " \n",
512
+ " if method == 'none':\n",
513
+ " print('Selected method is: ', method)\n",
514
+ " X_train_filtered = X_train\n",
515
+ " print('Shape of the training set after no feature selection: ', X_train_filtered.shape)\n",
516
+ " feature_selection_var = 'none'\n",
517
+ " selected_features = X_train_filtered.columns\n",
518
+ " return X_train_filtered, selected_features \n",
519
+ " \n",
520
+ " if method == 'lasso':\n",
521
+ " print('Selected method is: ', method)\n",
522
+ " from sklearn.linear_model import LassoCV\n",
523
+ " from sklearn.feature_selection import SelectFromModel\n",
524
+ " lasso = LassoCV().fit(X_train, y_train)\n",
525
+ " model = SelectFromModel(lasso, prefit=True)\n",
526
+ " X_train_filtered = model.transform(X_train)\n",
527
+ " selected_features = X_train.columns[model.get_support()]\n",
528
+ " print('Shape of the training set after feature selection with LassoCV: ', X_train_filtered.shape)\n",
529
+ " feature_selection_var = 'lasso'\n",
530
+ " return X_train_filtered, selected_features\n",
531
+ " \n",
532
+ " if method == 'pca':\n",
533
+ " print('Selected method is: ', method)\n",
534
+ " from sklearn.decomposition import PCA\n",
535
+ " pca = PCA(n_components=15)\n",
536
+ " X_train_pca = pca.fit_transform(X_train)\n",
537
+ " selected_features = X_train.columns[pca.explained_variance_ratio_.argsort()[::-1]][:15]\n",
538
+ " print('Shape of the training set after feature selection with PCA: ', X_train_pca.shape)\n",
539
+ " feature_selection_var = 'pca'\n",
540
+ " return X_train_pca, selected_features\n",
541
+ " \n",
542
+ " if method == 'rfe':\n",
543
+ " print('Selected method is: ', method)\n",
544
+ " from sklearn.feature_selection import RFE\n",
545
+ " from sklearn.ensemble import RandomForestClassifier\n",
546
+ " rfe_selector = RFE(estimator=RandomForestClassifier(n_estimators=100, n_jobs=-1), n_features_to_select=15, step=10, verbose=0)\n",
547
+ " rfe_selector.fit(X_train, y_train)\n",
548
+ " selected_features = X_train.columns[rfe_selector.support_]\n",
549
+ " X_train_filtered = X_train.iloc[:, rfe_selector.support_]\n",
550
+ " print('Shape of the training set after feature selection with RFE: ', X_train_filtered.shape)\n",
551
+ " feature_selection_var = 'rfe'\n",
552
+ " return X_train_filtered, selected_features\n",
553
+ " "
554
+ ]
555
+ },
556
  {
557
  "attachments": {},
558
  "cell_type": "markdown",
 
567
  },
568
  {
569
  "cell_type": "code",
570
+ "execution_count": 51,
571
  "metadata": {
572
  "slideshow": {
573
  "slide_type": "skip"
 
638
  },
639
  {
640
  "cell_type": "code",
641
+ "execution_count": 52,
642
  "metadata": {
643
  "slideshow": {
644
  "slide_type": "skip"
 
727
  },
728
  {
729
  "cell_type": "code",
730
+ "execution_count": 53,
731
  "metadata": {
732
  "slideshow": {
733
  "slide_type": "skip"
 
816
  },
817
  {
818
  "cell_type": "code",
819
+ "execution_count": 54,
820
  "metadata": {
821
  "slideshow": {
822
  "slide_type": "skip"
 
830
  "yes",
831
  "no"
832
  ],
833
+ "code_uid": "Select.0.40.16.25-rand98b210b9",
834
  "disabled": false,
835
  "hidden": false,
836
  "label": "Drop Duplicates",
837
+ "model_id": "f2d0fb31478a477ea1b0d4c4aa80fb2e",
838
  "url_key": "",
839
  "value": "yes",
840
  "widget": "Select"
841
  },
842
  "application/vnd.jupyter.widget-view+json": {
843
+ "model_id": "f2d0fb31478a477ea1b0d4c4aa80fb2e",
844
  "version_major": 2,
845
  "version_minor": 0
846
  },
 
854
  {
855
  "data": {
856
  "application/mercury+json": {
857
+ "code_uid": "Text.0.40.15.28-randc77c765e",
858
  "disabled": false,
859
  "hidden": false,
860
  "label": "Missing Value Threeshold",
861
+ "model_id": "f97f46b880e3434082498e1e720b0b65",
862
  "rows": 1,
863
  "url_key": "",
864
  "value": "80",
865
  "widget": "Text"
866
  },
867
  "application/vnd.jupyter.widget-view+json": {
868
+ "model_id": "f97f46b880e3434082498e1e720b0b65",
869
  "version_major": 2,
870
  "version_minor": 0
871
  },
 
879
  {
880
  "data": {
881
  "application/mercury+json": {
882
+ "code_uid": "Text.0.40.15.31-rand54dd8817",
883
  "disabled": false,
884
  "hidden": false,
885
  "label": "Variance Threshold",
886
+ "model_id": "4839b45f7e9b483d9fb8ee7fc05f1e19",
887
  "rows": 1,
888
  "url_key": "",
889
  "value": "0",
890
  "widget": "Text"
891
  },
892
  "application/vnd.jupyter.widget-view+json": {
893
+ "model_id": "4839b45f7e9b483d9fb8ee7fc05f1e19",
894
  "version_major": 2,
895
  "version_minor": 0
896
  },
 
904
  {
905
  "data": {
906
  "application/mercury+json": {
907
+ "code_uid": "Text.0.40.15.34-rand811824bd",
908
  "disabled": false,
909
  "hidden": false,
910
  "label": "Correlation Threshold",
911
+ "model_id": "10033d424ab949f7b51462e444e17ba7",
912
  "rows": 1,
913
  "url_key": "",
914
  "value": "1",
915
  "widget": "Text"
916
  },
917
  "application/vnd.jupyter.widget-view+json": {
918
+ "model_id": "10033d424ab949f7b51462e444e17ba7",
919
  "version_major": 2,
920
  "version_minor": 0
921
  },
 
935
  4,
936
  5
937
  ],
938
+ "code_uid": "Select.0.40.16.38-rand10d00d99",
939
  "disabled": false,
940
  "hidden": false,
941
  "label": "Outlier Removal Threshold",
942
+ "model_id": "96b8980bceaf46459d9ec06c8fb7c818",
943
  "url_key": "",
944
  "value": "none",
945
  "widget": "Select"
946
  },
947
  "application/vnd.jupyter.widget-view+json": {
948
+ "model_id": "96b8980bceaf46459d9ec06c8fb7c818",
949
  "version_major": 2,
950
  "version_minor": 0
951
  },
 
966
  "minmax",
967
  "robust"
968
  ],
969
+ "code_uid": "Select.0.40.16.46-rand1bc79c9d",
970
  "disabled": false,
971
  "hidden": false,
972
  "label": "Scaling Variables",
973
+ "model_id": "e7650cea7a834d588a995407052e1f2c",
974
  "url_key": "",
975
  "value": "none",
976
  "widget": "Select"
977
  },
978
  "application/vnd.jupyter.widget-view+json": {
979
+ "model_id": "e7650cea7a834d588a995407052e1f2c",
980
  "version_major": 2,
981
  "version_minor": 0
982
  },
 
996
  "knn",
997
  "most_frequent"
998
  ],
999
+ "code_uid": "Select.0.40.16.50-rand69ae31a0",
1000
  "disabled": false,
1001
  "hidden": false,
1002
  "label": "Imputation Methods",
1003
+ "model_id": "652a64af16174970919183e6ab1c5b53",
1004
  "url_key": "",
1005
  "value": "mean",
1006
  "widget": "Select"
1007
  },
1008
  "application/vnd.jupyter.widget-view+json": {
1009
+ "model_id": "652a64af16174970919183e6ab1c5b53",
1010
+ "version_major": 2,
1011
+ "version_minor": 0
1012
+ },
1013
+ "text/plain": [
1014
+ "mercury.Select"
1015
+ ]
1016
+ },
1017
+ "metadata": {},
1018
+ "output_type": "display_data"
1019
+ },
1020
+ {
1021
+ "data": {
1022
+ "application/mercury+json": {
1023
+ "choices": [
1024
+ "none",
1025
+ "lasso",
1026
+ "rfe",
1027
+ "pca",
1028
+ "boruta"
1029
+ ],
1030
+ "code_uid": "Select.0.40.16.55-rand148632f9",
1031
+ "disabled": false,
1032
+ "hidden": false,
1033
+ "label": "Feature Selection",
1034
+ "model_id": "9a1a199471314cd7a3363ea25d9d341a",
1035
+ "url_key": "",
1036
+ "value": "none",
1037
+ "widget": "Select"
1038
+ },
1039
+ "application/vnd.jupyter.widget-view+json": {
1040
+ "model_id": "9a1a199471314cd7a3363ea25d9d341a",
1041
  "version_major": 2,
1042
  "version_minor": 0
1043
  },
 
1057
  "undersampling",
1058
  "rose"
1059
  ],
1060
+ "code_uid": "Select.0.40.16.59-rand3a34b3e3",
1061
  "disabled": false,
1062
  "hidden": false,
1063
  "label": "Imbalance Treatment",
1064
+ "model_id": "358dd80171af4de2a944c3077b2f48d8",
1065
  "url_key": "",
1066
  "value": "none",
1067
  "widget": "Select"
1068
  },
1069
  "application/vnd.jupyter.widget-view+json": {
1070
+ "model_id": "358dd80171af4de2a944c3077b2f48d8",
1071
  "version_major": 2,
1072
  "version_minor": 0
1073
  },
 
1090
  "decision_tree",
1091
  "xgboost"
1092
  ],
1093
+ "code_uid": "Select.0.40.16.64-rand4b9cf5e0",
1094
  "disabled": false,
1095
  "hidden": false,
1096
  "label": "Model Selection",
1097
+ "model_id": "8477ca5211bd4914861b3e48cda21c10",
1098
  "url_key": "",
1099
  "value": "random_forest",
1100
  "widget": "Select"
1101
  },
1102
  "application/vnd.jupyter.widget-view+json": {
1103
+ "model_id": "8477ca5211bd4914861b3e48cda21c10",
1104
  "version_major": 2,
1105
  "version_minor": 0
1106
  },
 
1167
  "input_n_neighbors = 5 # only for knn imputation\n",
1168
  "input_imputation_method = str(input_imputation_method.value)\n",
1169
  "\n",
1170
+ "# input feature selection variables\n",
1171
+ "input_feature_selection = mr.Select(label=\"Feature Selection\", value=\"none\", choices=['none', 'lasso', 'rfe', 'pca', 'boruta']) # 'none', 'lasso', 'rfe', 'pca', 'boruta'\n",
1172
+ "input_feature_selection = str(input_feature_selection.value)\n",
1173
+ "\n",
1174
  "# input imbalance treatment variables\n",
1175
  "input_imbalance_treatment = mr.Select(label=\"Imbalance Treatment\", value=\"none\", choices=['none', 'smote', 'undersampling', 'rose']) # 'none', 'smote', 'undersampling', 'rose'\n",
1176
  "input_imbalance_treatment = str(input_imbalance_treatment.value)\n",
 
1208
  },
1209
  {
1210
  "cell_type": "code",
1211
+ "execution_count": 55,
1212
  "metadata": {
1213
  "slideshow": {
1214
  "slide_type": "skip"
 
1286
  },
1287
  {
1288
  "cell_type": "code",
1289
+ "execution_count": 56,
1290
  "metadata": {
1291
  "slideshow": {
1292
  "slide_type": "skip"
 
1324
  },
1325
  {
1326
  "cell_type": "code",
1327
+ "execution_count": 57,
1328
  "metadata": {
1329
  "slideshow": {
1330
  "slide_type": "slide"