dperales commited on
Commit
9395525
·
1 Parent(s): d41e58c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -116
app.py CHANGED
@@ -142,7 +142,7 @@ def main():
142
 
143
  with st.spinner("Analyzing..."):
144
  #with col2:
145
- st.markdown("<br><br><br><br>", unsafe_allow_html=True)
146
  # train kmeans model
147
  cluster_model = create_model(selected_model, num_clusters = selected_clusters)
148
 
@@ -207,123 +207,123 @@ def main():
207
  st.bar_chart(feat_imp.set_index('Feature'))
208
 
209
  elif page == "Anomaly Detection":
210
- with col1:
211
- st.header('Anomaly Detection')
212
 
213
- st.write(
214
- """
215
- """
216
- )
217
-
218
- # import pycaret anomaly
219
- from pycaret.anomaly import setup, create_model, assign_model, pull, plot_model
220
- # import AnomalyExperiment
221
- from pycaret.anomaly import AnomalyExperiment
222
-
223
- # Display the list of CSV files
224
- directory = "./"
225
- all_files = os.listdir(directory)
226
- # Filter files to only include CSV files
227
- csv_files = [file for file in all_files if file.endswith(".csv")]
228
- # Select a CSV file from the list
229
- selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- # Upload the CSV file
232
- uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
233
-
234
- # Define the unsupervised model
235
- anomalymodel = ['abod', 'cluster', 'cof', 'iforest', 'histogram', 'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos']
236
- selected_model = st.selectbox("Choose an anomaly model", anomalymodel)
237
-
238
- # Read and display the CSV file
239
- if selected_csv != "None" or uploaded_file is not None:
240
- if uploaded_file:
241
- try:
242
- delimiter = ','
243
- insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
244
- except ValueError:
245
- delimiter = '|'
246
- insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
247
- else:
248
- insurance_claims = pd.read_csv(selected_csv)
249
-
250
- num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
251
- insurance_claims_reduced = insurance_claims.head(num_rows)
252
- st.write("Rows to be processed: " + str(num_rows))
253
-
254
- all_columns = insurance_claims_reduced.columns.tolist()
255
- selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
256
- insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
257
-
258
- with st.expander("Inference Description", expanded=True):
259
- insurance_claims_reduced.describe().T
260
-
261
- with st.expander("Head Map", expanded=True):
262
- cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
263
- num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns
264
-
265
- # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
266
- # Calculate the correlation matrix
267
- corr_matrix = insurance_claims_reduced[num_col].corr()
268
- # Create a Matplotlib figure
269
- fig, ax = plt.subplots(figsize=(12, 8))
270
- # Create a heatmap using seaborn
271
- #st.header("Heat Map")
272
- sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
273
- # Set the title for the heatmap
274
- ax.set_title('Correlation Heatmap')
275
- # Display the heatmap in Streamlit
276
- st.pyplot(fig)
277
-
278
- if st.button("Prediction"):
279
-
280
- s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
281
- # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
282
- transformation=p_transformation,
283
- normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
284
-
285
- exp_anomaly = AnomalyExperiment()
286
- # init setup on exp
287
- exp_anomaly.setup(insurance_claims_reduced, session_id = 123)
288
-
289
- with st.spinner("Analyzing..."):
290
- with col2:
291
- st.markdown("<br><br><br><br>", unsafe_allow_html=True)
292
- # train model
293
- anomaly_model = create_model(selected_model)
294
-
295
- with st.expander("Assign Model", expanded=False):
296
- #st.header("Assign Model")
297
- anomaly_model_2 = assign_model(anomaly_model)
298
- anomaly_model_2
299
-
300
- with st.expander("Anomaly Metrics", expanded=False):
301
- #st.header("Anomaly Metrics")
302
- anomaly_results = pull()
303
- anomaly_results
304
-
305
- with st.expander("Anomaly Plots", expanded=False):
306
- if graph_select:
307
- # plot
308
- #st.header("Anomaly Plots")
309
- plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
310
- plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')
311
-
312
- with st.expander("Feature Importance", expanded=False):
313
- if graph_select and feat_imp_select:
314
- # Create a Classification Model to extract feature importance
315
- #st.header("Feature Importance")
316
- from pycaret.classification import setup, create_model, get_config
317
- s = setup(anomaly_model_2, target = 'Anomaly')
318
- lr = create_model('lr')
319
- # this is how you can recreate the table
320
- feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
321
- # sort by feature importance value and filter top 10
322
- feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
323
- # Display the filtered table in Streamlit
324
- # st.dataframe(feat_imp)
325
- # Display the filtered table as a bar chart in Streamlit
326
- st.bar_chart(feat_imp.set_index('Feature'))
327
  try:
328
  main()
329
  except Exception as e:
 
142
 
143
  with st.spinner("Analyzing..."):
144
  #with col2:
145
+ #st.markdown("<br><br><br><br>", unsafe_allow_html=True)
146
  # train kmeans model
147
  cluster_model = create_model(selected_model, num_clusters = selected_clusters)
148
 
 
207
  st.bar_chart(feat_imp.set_index('Feature'))
208
 
209
  elif page == "Anomaly Detection":
210
+ #with col1:
211
+ st.header('Anomaly Detection')
212
 
213
+ st.write(
214
+ """
215
+ """
216
+ )
217
+
218
+ # import pycaret anomaly
219
+ from pycaret.anomaly import setup, create_model, assign_model, pull, plot_model
220
+ # import AnomalyExperiment
221
+ from pycaret.anomaly import AnomalyExperiment
222
+
223
+ # Display the list of CSV files
224
+ directory = "./"
225
+ all_files = os.listdir(directory)
226
+ # Filter files to only include CSV files
227
+ csv_files = [file for file in all_files if file.endswith(".csv")]
228
+ # Select a CSV file from the list
229
+ selected_csv = st.selectbox("Select a CSV file from the list", ["None"] + csv_files)
230
+
231
+ # Upload the CSV file
232
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
233
+
234
+ # Define the unsupervised model
235
+ anomalymodel = ['abod', 'cluster', 'cof', 'iforest', 'histogram', 'knn', 'lof', 'svm', 'pca', 'mcd', 'sod', 'sos']
236
+ selected_model = st.selectbox("Choose an anomaly model", anomalymodel)
237
+
238
+ # Read and display the CSV file
239
+ if selected_csv != "None" or uploaded_file is not None:
240
+ if uploaded_file:
241
+ try:
242
+ delimiter = ','
243
+ insurance_claims = pd.read_csv (uploaded_file, sep=delimiter)
244
+ except ValueError:
245
+ delimiter = '|'
246
+ insurance_claims = pd.read_csv (uploaded_file, sep=delimiter, encoding='latin-1')
247
+ else:
248
+ insurance_claims = pd.read_csv(selected_csv)
249
+
250
+ num_rows = int(insurance_claims.shape[0]*int(num_lines)/100)
251
+ insurance_claims_reduced = insurance_claims.head(num_rows)
252
+ st.write("Rows to be processed: " + str(num_rows))
253
+
254
+ all_columns = insurance_claims_reduced.columns.tolist()
255
+ selected_columns = st.multiselect("Choose columns", all_columns, default=all_columns)
256
+ insurance_claims_reduced = insurance_claims_reduced[selected_columns].copy()
257
+
258
+ with st.expander("Inference Description", expanded=True):
259
+ insurance_claims_reduced.describe().T
260
+
261
+ with st.expander("Head Map", expanded=True):
262
+ cat_col = insurance_claims_reduced.select_dtypes(include=['object']).columns
263
+ num_col = insurance_claims_reduced.select_dtypes(exclude=['object']).columns
264
+
265
+ # insurance_claims[num_col].hist(bins=15, figsize=(20, 15), layout=(5, 4))
266
+ # Calculate the correlation matrix
267
+ corr_matrix = insurance_claims_reduced[num_col].corr()
268
+ # Create a Matplotlib figure
269
+ fig, ax = plt.subplots(figsize=(12, 8))
270
+ # Create a heatmap using seaborn
271
+ #st.header("Heat Map")
272
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
273
+ # Set the title for the heatmap
274
+ ax.set_title('Correlation Heatmap')
275
+ # Display the heatmap in Streamlit
276
+ st.pyplot(fig)
277
 
278
+ if st.button("Prediction"):
279
+
280
+ s = setup(insurance_claims_reduced, session_id = 123, remove_multicollinearity=p_remove_multicollinearity, multicollinearity_threshold=p_multicollinearity_threshold,
281
+ # remove_outliers=p_remove_outliers, outliers_method=p_outliers_method,
282
+ transformation=p_transformation,
283
+ normalize=p_normalize, pca=p_pca, pca_method=p_pca_method)
284
+
285
+ exp_anomaly = AnomalyExperiment()
286
+ # init setup on exp
287
+ exp_anomaly.setup(insurance_claims_reduced, session_id = 123)
288
+
289
+ with st.spinner("Analyzing..."):
290
+ #with col2:
291
+ #st.markdown("<br><br><br><br>", unsafe_allow_html=True)
292
+ # train model
293
+ anomaly_model = create_model(selected_model)
294
+
295
+ with st.expander("Assign Model", expanded=False):
296
+ #st.header("Assign Model")
297
+ anomaly_model_2 = assign_model(anomaly_model)
298
+ anomaly_model_2
299
+
300
+ with st.expander("Anomaly Metrics", expanded=False):
301
+ #st.header("Anomaly Metrics")
302
+ anomaly_results = pull()
303
+ anomaly_results
304
+
305
+ with st.expander("Anomaly Plots", expanded=False):
306
+ if graph_select:
307
+ # plot
308
+ #st.header("Anomaly Plots")
309
+ plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
310
+ plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')
311
+
312
+ with st.expander("Feature Importance", expanded=False):
313
+ if graph_select and feat_imp_select:
314
+ # Create a Classification Model to extract feature importance
315
+ #st.header("Feature Importance")
316
+ from pycaret.classification import setup, create_model, get_config
317
+ s = setup(anomaly_model_2, target = 'Anomaly')
318
+ lr = create_model('lr')
319
+ # this is how you can recreate the table
320
+ feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
321
+ # sort by feature importance value and filter top 10
322
+ feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
323
+ # Display the filtered table in Streamlit
324
+ # st.dataframe(feat_imp)
325
+ # Display the filtered table as a bar chart in Streamlit
326
+ st.bar_chart(feat_imp.set_index('Feature'))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  try:
328
  main()
329
  except Exception as e: