dperales commited on
Commit
c2f025a
·
1 Parent(s): 09b69ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +258 -250
app.py CHANGED
@@ -13,259 +13,267 @@ from PIL import ImageColor
13
  from PIL import ImageDraw
14
  from PIL import ImageFont
15
 
16
- hide_streamlit_style = """
17
- <style>
18
- #MainMenu {visibility: hidden;}
19
- footer {visibility: hidden;}
20
- </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  """
22
- st.markdown(hide_streamlit_style, unsafe_allow_html=True)
23
-
24
- with st.sidebar:
25
- image = Image.open('itaca_logo.png')
26
- st.image(image, width=150) #,use_column_width=True)
27
- page = option_menu(menu_title='Menu',
28
- menu_icon="robot",
29
- options=["Clustering Analysis",
30
- "Anomaly Detection"],
31
- icons=["chat-dots",
32
- "key"],
33
- default_index=0
34
- )
35
-
36
- # Additional section below the option menu
37
- # st.markdown("---") # Add a separator line
38
- st.header("Settings")
39
-
40
- # Define the options for the dropdown list
41
- numclusters = [2, 3, 4, 5, 6]
42
- # selected_clusters = st.selectbox("Choose a number of clusters", numclusters)
43
- selected_clusters = st.slider("Choose a number of clusters", min_value=2, max_value=10, value=4)
44
-
45
- p_remove_multicollinearity = st.checkbox("Remove Multicollinearity", value=False)
46
- p_multicollinearity_threshold = st.slider("Choose multicollinearity thresholds", min_value=0.0, max_value=1.0, value=0.9)
47
- # p_remove_outliers = st.checkbox("Remove Outliers", value=False)
48
- # p_outliers_method = st.selectbox ("Choose an Outlier Method", ["iforest", "ee", "lof"])
49
- p_transformation = st.checkbox("Choose Power Transform", value = False)
50
- p_normalize = st.checkbox("Choose Normalize", value = False)
51
- p_pca = st.checkbox("Choose PCA", value = False)
52
- p_pca_method = st.selectbox ("Choose a PCA Method", ["linear", "kernel", "incremental"])
53
-
54
- st.title('ITACA Insurance Core AI Module')
55
-
56
- if page == "Clustering Analysis":
57
- st.header('Clustering Analysis')
58
-
59
- st.write(
60
- """
61
- """
62
- )
63
-
64
- # import pycaret unsupervised models
65
- from pycaret.clustering import *
66
- # import ClusteringExperiment
67
- from pycaret.clustering import ClusteringExperiment
68
-
69
- # Display the list of CSV files
70
- directory = "./"
71
- all_files = os.listdir(directory)
72
- # Filter files to only include CSV files
73
- csv_files = [file for file in all_files if file.endswith(".csv")]
74
- # Select a CSV file from the list
75
- selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
76
-
77
- # Upload the CSV file
78
- uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
79
-
80
- # Define the unsupervised model
81
- clusteringmodel = ['kmeans', 'ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics', 'birch']
82
- selected_model = st.selectbox("Choose a clustering model", clusteringmodel)
83
-
84
- # Read and display the CSV file
85
- if selected_csv != "None" or uploaded_file is not None:
86
- if uploaded_file:
87
- try:
88
- delimiter = ','
89
- insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
90
- except ValueError:
91
- delimiter = '|'
92
- insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
93
- else:
94
- insurance_claims = pd.read_csv(selected_csv)
95
-
96
- st.header("Inference Description")
97
- insurance_claims.describe().T
98
-
99
- cat_col = insurance_claims.select_dtypes(include=['object']).columns
100
- num_col = insurance_claims.select_dtypes(exclude=['object']).columns
101
-
102
- # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
103
- # Calculate the correlation matrix
104
- corr_matrix = insurance_claims[num_col].corr()
105
- # Create a Matplotlib figure
106
- fig, ax = plt.subplots(figsize=(12, 8))
107
- # Create a heatmap using seaborn
108
- st.header("Heat Map")
109
- sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
110
- # Set the title for the heatmap
111
- ax.set_title('Correlation Heatmap')
112
- # Display the heatmap in Streamlit
113
- st.pyplot(fig)
114
-
115
- all_columns = insurance_claims.columns.tolist()
116
- selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
117
-
118
- if st.button("Prediction"):
119
- insurance_claims = insurance_claims[selected_columns].copy()
120
-
121
- s = setup(insurance_claims, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
122
- # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
123
- transformation=p_transformation,
124
- normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
125
- exp_clustering = ClusteringExperiment()
126
- # init setup on exp
127
- exp_clustering.setup(insurance_claims, session_id = 123)
128
-
129
- with st.spinner("Analyzing..."):
130
- # train kmeans model
131
- cluster_model = create_model(selected_model, num_clusters = selected_clusters)
132
-
133
- cluster_model_2 = assign_model(cluster_model)
134
- # Calculate summary statistics for each cluster
135
- cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
136
- 'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
137
- ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
138
- st.header("Cluster Summary")
139
- cluster_summary
140
- st.header("Assign Model")
141
- cluster_model_2
142
-
143
- # all_metrics = get_metrics()
144
- # all_metrics
145
-
146
- st.header("Clustering Metrics")
147
- cluster_results = pull()
148
- cluster_results
149
-
150
- st.header("Clustering Plots")
151
- # plot pca cluster plot
152
- # plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
153
-
154
- # if selected_model != 'ap':
155
- # plot_model(cluster_model, plot = 'tsne', display_format = 'streamlit')
156
-
157
- # if selected_model not in ('ap', 'meanshift', 'dbscan', 'optics'):
158
- # plot_model(cluster_model, plot = 'elbow', display_format = 'streamlit')
159
-
160
- # if selected_model not in ('ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics'):
161
- # plot_model(cluster_model, plot = 'silhouette', display_format = 'streamlit')
162
-
163
- # if selected_model not in ('ap', 'sc', 'hclust', 'dbscan', 'optics', 'birch'):
164
- # plot_model(cluster_model, plot = 'distance', display_format = 'streamlit')
165
 
166
- # if selected_model != 'ap':
167
- # plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
168
-
169
- # Create a Classification Model to extract feature importance
170
- st.header("Feature Importance")
171
- from pycaret.classification import *
172
- s = setup(cluster_model_2, target = 'Cluster')
173
- lr = create_model('lr')
174
- # this is how you can recreate the table
175
- print("Number of columns in X_train:", len(get_config('X_train').columns))
176
- print("Number of coefficients in lr:", len(lr.coef_[0]))
177
- feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
178
- # sort by feature importance value and filter top 10
179
- feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
180
- # Display the filtered table in Streamlit
181
- # st.dataframe(feat_imp)
182
- # Display the filtered table as a bar chart in Streamlit
183
- st.bar_chart(feat_imp.set_index('Feature'))
184
-
185
- elif page == "Anomaly Detection":
186
- st.header('Anomaly Detection')
187
-
188
- st.write(
189
- """
190
- """
191
- )
192
-
193
- # import pycaret anomaly
194
- from pycaret.anomaly import *
195
- # import AnomalyExperiment
196
- from pycaret.anomaly import AnomalyExperiment
197
-
198
- # Display the list of CSV files
199
- directory = "./"
200
- all_files = os.listdir(directory)
201
- # Filter files to only include CSV files
202
- csv_files = [file for file in all_files if file.endswith(".csv")]
203
- # Select a CSV file from the list
204
- selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
205
-
206
- # Upload the CSV file
207
- uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
208
-
209
- # Define the unsupervised model
210
- anomalymodel = ['abod', 'cluster', 'cof', 'iforest', 'histogram', 'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos']
211
- selected_model = st.selectbox("Choose an anomaly model", anomalymodel)
212
-
213
- # Read and display the CSV file
214
- if selected_csv != "None" or uploaded_file is not None:
215
- if uploaded_file:
216
- try:
217
- delimiter = ','
218
- insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
219
- except ValueError:
220
- delimiter = '|'
221
- insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
222
- else:
223
- insurance_claims = pd.read_csv(selected_csv)
224
-
225
- all_columns = insurance_claims.columns.tolist()
226
- selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
227
-
228
- if st.button("Prediction"):
229
- insurance_claims = insurance_claims[selected_columns].copy()
230
-
231
- # s = setup(insurance_claims, session_id = 123)
232
-
233
- s = setup(insurance_claims, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
234
  # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
235
  transformation=p_transformation,
236
  normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
237
-
238
- exp_anomaly = AnomalyExperiment()
239
- # init setup on exp
240
- exp_anomaly.setup(insurance_claims, session_id = 123)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
 
242
- with st.spinner("Analyzing..."):
243
- # train model
244
- anomaly_model = create_model(selected_model)
245
-
246
- st.header("Assign Model")
247
- anomaly_model_2 = assign_model(anomaly_model)
248
- anomaly_model_2
249
-
250
- st.header("Anomaly Metrics")
251
- anomaly_results = pull()
252
- anomaly_results
253
-
254
- # plot
255
- st.header("Anomaly Plots")
256
- plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
257
- plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')
258
-
259
- # Create a Classification Model to extract feature importance
260
- st.header("Feature Importance")
261
- from pycaret.classification import *
262
- s = setup(anomaly_model_2, target = 'Anomaly')
263
- lr = create_model('lr')
264
- # this is how you can recreate the table
265
- feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
266
- # sort by feature importance value and filter top 10
267
- feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
268
- # Display the filtered table in Streamlit
269
- # st.dataframe(feat_imp)
270
- # Display the filtered table as a bar chart in Streamlit
271
- st.bar_chart(feat_imp.set_index('Feature'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  from PIL import ImageDraw
14
  from PIL import ImageFont
15
 
16
+ def main():
17
+ hide_streamlit_style = """
18
+ <style>
19
+ #MainMenu {visibility: hidden;}
20
+ footer {visibility: hidden;}
21
+ </style>
22
+ """
23
+ st.markdown(hide_streamlit_style, unsafe_allow_html=True)
24
+
25
+ with st.sidebar:
26
+ image = Image.open('itaca_logo.png')
27
+ st.image(image, width=150) #,use_column_width=True)
28
+ page = option_menu(menu_title='Menu',
29
+ menu_icon="robot",
30
+ options=["Clustering Analysis",
31
+ "Anomaly Detection"],
32
+ icons=["chat-dots",
33
+ "key"],
34
+ default_index=0
35
+ )
36
+
37
+ # Additional section below the option menu
38
+ # st.markdown("---") # Add a separator line
39
+ st.header("Settings")
40
+
41
+ graph_select = st.checkbox("Show Graphics", value= True)
42
+ feat_imp_select = st.checkbox("Feature Importance", value= False)
43
+
44
+ # Define the options for the dropdown list
45
+ numclusters = [2, 3, 4, 5, 6]
46
+ selected_clusters = st.slider("Choose a number of clusters", min_value=2, max_value=10, value=4)
47
+
48
+ p_remove_multicollinearity = st.checkbox("Remove Multicollinearity", value=False)
49
+ p_multicollinearity_threshold = st.slider("Choose multicollinearity thresholds", min_value=0.0, max_value=1.0, value=0.9)
50
+ # p_remove_outliers = st.checkbox("Remove Outliers", value=False)
51
+ # p_outliers_method = st.selectbox ("Choose an Outlier Method", ["iforest", "ee", "lof"])
52
+ p_transformation = st.checkbox("Choose Power Transform", value = False)
53
+ p_normalize = st.checkbox("Choose Normalize", value = False)
54
+ p_pca = st.checkbox("Choose PCA", value = False)
55
+ p_pca_method = st.selectbox ("Choose a PCA Method", ["linear", "kernel", "incremental"])
56
+
57
+ st.title('ITACA Insurance Core AI Module')
58
+
59
+ if page == "Clustering Analysis":
60
+ st.header('Clustering Analysis')
61
+
62
+ st.write(
63
  """
64
+ """
65
+ )
66
+
67
+ # import pycaret unsupervised models
68
+ from pycaret.clustering import setup, create_model, assign_model, pull, plot_model
69
+ # import ClusteringExperiment
70
+ from pycaret.clustering import ClusteringExperiment
71
+
72
+ # Display the list of CSV files
73
+ directory = "./"
74
+ all_files = os.listdir(directory)
75
+ # Filter files to only include CSV files
76
+ csv_files = [file for file in all_files if file.endswith(".csv")]
77
+ # Select a CSV file from the list
78
+ selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
79
+
80
+ # Upload the CSV file
81
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
82
+
83
+ # Define the unsupervised model
84
+ clusteringmodel = ['kmeans', 'ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics', 'birch']
85
+ selected_model = st.selectbox("Choose a clustering model", clusteringmodel)
86
+
87
+ # Read and display the CSV file
88
+ if selected_csv != "None" or uploaded_file is not None:
89
+ if uploaded_file:
90
+ try:
91
+ delimiter = ','
92
+ insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
93
+ except ValueError:
94
+ delimiter = '|'
95
+ insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
96
+ else:
97
+ insurance_claims = pd.read_csv(selected_csv)
98
+
99
+ st.header("Inference Description")
100
+ insurance_claims.describe().T
101
+
102
+ cat_col = insurance_claims.select_dtypes(include=['object']).columns
103
+ num_col = insurance_claims.select_dtypes(exclude=['object']).columns
104
+
105
+ # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
106
+ # Calculate the correlation matrix
107
+ corr_matrix = insurance_claims[num_col].corr()
108
+ # Create a Matplotlib figure
109
+ fig, ax = plt.subplots(figsize=(12, 8))
110
+ # Create a heatmap using seaborn
111
+ st.header("Heat Map")
112
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
113
+ # Set the title for the heatmap
114
+ ax.set_title('Correlation Heatmap')
115
+ # Display the heatmap in Streamlit
116
+ st.pyplot(fig)
117
+
118
+ all_columns = insurance_claims.columns.tolist()
119
+ selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
120
+
121
+ if st.button("Prediction"):
122
+ insurance_claims = insurance_claims[selected_columns].copy()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ s = setup(insurance_claims, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
126
  transformation=p_transformation,
127
  normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
128
+ exp_clustering = ClusteringExperiment()
129
+ # init setup on exp
130
+ exp_clustering.setup(insurance_claims, session_id = 123)
131
+
132
+ with st.spinner("Analyzing..."):
133
+ # train kmeans model
134
+ cluster_model = create_model(selected_model, num_clusters = selected_clusters)
135
+
136
+ cluster_model_2 = assign_model(cluster_model)
137
+ # Calculate summary statistics for each cluster
138
+ cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
139
+ 'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
140
+ ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
141
+ st.header("Cluster Summary")
142
+ cluster_summary
143
+ st.header("Assign Model")
144
+ cluster_model_2
145
+
146
+ # all_metrics = get_metrics()
147
+ # all_metrics
148
+
149
+ st.header("Clustering Metrics")
150
+ cluster_results = pull()
151
+ cluster_results
152
+
153
+ if graph_select:
154
+ st.header("Clustering Plots")
155
+ # plot pca cluster plot
156
+ plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
157
+
158
+ if selected_model != 'ap':
159
+ plot_model(cluster_model, plot = 'tsne', display_format = 'streamlit')
160
+
161
+ if selected_model not in ('ap', 'meanshift', 'dbscan', 'optics'):
162
+ plot_model(cluster_model, plot = 'elbow', display_format = 'streamlit')
163
+
164
+ if selected_model not in ('ap', 'meanshift', 'sc', 'hclust', 'dbscan', 'optics'):
165
+ plot_model(cluster_model, plot = 'silhouette', display_format = 'streamlit')
166
+
167
+ if selected_model not in ('ap', 'sc', 'hclust', 'dbscan', 'optics', 'birch'):
168
+ plot_model(cluster_model, plot = 'distance', display_format = 'streamlit')
169
+
170
+ if selected_model != 'ap':
171
+ plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
172
+
173
+ # Create a Classification Model to extract feature importance
174
+ if feat_imp_select:
175
+ st.header("Feature Importance")
176
+ from pycaret.classification import setup, create_model, get_config
177
+ s = setup(cluster_model_2, target = 'Cluster')
178
+ lr = create_model('lr')
179
+
180
+ # this is how you can recreate the table
181
+ feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
182
+ # sort by feature importance value and filter top 10
183
+ feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
184
+ # Display the filtered table in Streamlit
185
+ # st.dataframe(feat_imp)
186
+ # Display the filtered table as a bar chart in Streamlit
187
+ st.bar_chart(feat_imp.set_index('Feature'))
188
+
189
+ elif page == "Anomaly Detection":
190
+ st.header('Anomaly Detection')
191
+
192
+ st.write(
193
+ """
194
+ """
195
+ )
196
+
197
+ # import pycaret anomaly
198
+ from pycaret.anomaly import setup, create_model, assign_model, pull, plot_model
199
+ # import AnomalyExperiment
200
+ from pycaret.anomaly import AnomalyExperiment
201
+
202
+ # Display the list of CSV files
203
+ directory = "./"
204
+ all_files = os.listdir(directory)
205
+ # Filter files to only include CSV files
206
+ csv_files = [file for file in all_files if file.endswith(".csv")]
207
+ # Select a CSV file from the list
208
+ selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
209
 
210
+ # Upload the CSV file
211
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
212
+
213
+ # Define the unsupervised model
214
+ anomalymodel = ['abod', 'cluster', 'cof', 'iforest', 'histogram', 'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos']
215
+ selected_model = st.selectbox("Choose an anomaly model", anomalymodel)
216
+
217
+ # Read and display the CSV file
218
+ if selected_csv != "None" or uploaded_file is not None:
219
+ if uploaded_file:
220
+ try:
221
+ delimiter = ','
222
+ insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
223
+ except ValueError:
224
+ delimiter = '|'
225
+ insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
226
+ else:
227
+ insurance_claims = pd.read_csv(selected_csv)
228
+
229
+ all_columns = insurance_claims.columns.tolist()
230
+ selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
231
+
232
+ if st.button("Prediction"):
233
+ insurance_claims = insurance_claims[selected_columns].copy()
234
+
235
+ s = setup(insurance_claims, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
236
+ # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
237
+ transformation=p_transformation,
238
+ normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
239
+
240
+ exp_anomaly = AnomalyExperiment()
241
+ # init setup on exp
242
+ exp_anomaly.setup(insurance_claims, session_id = 123)
243
+
244
+ with st.spinner("Analyzing..."):
245
+ # train model
246
+ anomaly_model = create_model(selected_model)
247
+
248
+ st.header("Assign Model")
249
+ anomaly_model_2 = assign_model(anomaly_model)
250
+ anomaly_model_2
251
+
252
+ st.header("Anomaly Metrics")
253
+ anomaly_results = pull()
254
+ anomaly_results
255
+
256
+ if graph_select:
257
+ # plot
258
+ st.header("Anomaly Plots")
259
+ plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
260
+ plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')
261
+
262
+ if feat_imp_select:
263
+ # Create a Classification Model to extract feature importance
264
+ st.header("Feature Importance")
265
+ from pycaret.classification import setup, create_model, get_config
266
+ s = setup(anomaly_model_2, target = 'Anomaly')
267
+ lr = create_model('lr')
268
+ # this is how you can recreate the table
269
+ feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
270
+ # sort by feature importance value and filter top 10
271
+ feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
272
+ # Display the filtered table in Streamlit
273
+ # st.dataframe(feat_imp)
274
+ # Display the filtered table as a bar chart in Streamlit
275
+ st.bar_chart(feat_imp.set_index('Feature'))
276
+ try:
277
+ main()
278
+ except Exception as e:
279
+ st.error(f"An error occurred: {e}")