dperales commited on
Commit
39de30e
·
1 Parent(s): 1715696

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -1
app.py CHANGED
@@ -93,6 +93,7 @@ if page == "Clustering Analysis":
93
  else:
94
  insurance_claims = pd.read_csv(selected_csv)
95
 
 
96
  insurance_claims.describe().T
97
 
98
  cat_col = insurance_claims.select_dtypes(include=['object']).columns
@@ -104,6 +105,7 @@ if page == "Clustering Analysis":
104
  # Create a Matplotlib figure
105
  fig, ax = plt.subplots(figsize=(12, 8))
106
  # Create a heatmap using seaborn
 
107
  sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
108
  # Set the title for the heatmap
109
  ax.set_title('Correlation Heatmap')
@@ -133,15 +135,19 @@ if page == "Clustering Analysis":
133
  cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
134
  'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
135
  ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
 
136
  cluster_summary
 
137
  cluster_model_2
138
 
139
  # all_metrics = get_metrics()
140
  # all_metrics
141
 
 
142
  cluster_results = pull()
143
  cluster_results
144
 
 
145
  # plot pca cluster plot
146
  plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
147
 
@@ -160,6 +166,20 @@ if page == "Clustering Analysis":
160
  if selected_model != 'ap':
161
  plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  elif page == "Anomaly Detection":
164
  st.header('Anomaly Detection')
165
 
@@ -221,12 +241,29 @@ elif page == "Anomaly Detection":
221
  # train model
222
  anomaly_model = create_model(selected_model)
223
 
 
224
  anomaly_model_2 = assign_model(anomaly_model)
225
  anomaly_model_2
226
 
 
227
  anomaly_results = pull()
228
  anomaly_results
229
 
230
  # plot
 
231
  plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
232
- plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  else:
94
  insurance_claims = pd.read_csv(selected_csv)
95
 
96
+ st.header("Inference Description")
97
  insurance_claims.describe().T
98
 
99
  cat_col = insurance_claims.select_dtypes(include=['object']).columns
 
105
  # Create a Matplotlib figure
106
  fig, ax = plt.subplots(figsize=(12, 8))
107
  # Create a heatmap using seaborn
108
+ st.header("Heat Map")
109
  sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', ax=ax)
110
  # Set the title for the heatmap
111
  ax.set_title('Correlation Heatmap')
 
135
  cluster_summary = cluster_model_2.groupby('Cluster').agg(['count', 'mean', 'median', 'min', 'max',
136
  'std', 'var', 'sum', ('quantile_25', lambda x: x.quantile(0.25)),
137
  ('quantile_75', lambda x: x.quantile(0.75)), 'skew'])
138
+ st.header("Cluster Summary")
139
  cluster_summary
140
+ st.header("Assign Model")
141
  cluster_model_2
142
 
143
  # all_metrics = get_metrics()
144
  # all_metrics
145
 
146
+ st.header("Clustering Metrics")
147
  cluster_results = pull()
148
  cluster_results
149
 
150
+ st.header("Clustering Plots")
151
  # plot pca cluster plot
152
  plot_model(cluster_model, plot = 'cluster', display_format = 'streamlit')
153
 
 
166
  if selected_model != 'ap':
167
  plot_model(cluster_model, plot = 'distribution', display_format = 'streamlit')
168
 
169
+ # Create a Classification Model to extract feature importance
170
+ st.header("Feature Importance")
171
+ from pycaret.classification import *
172
+ s = setup(cluster_model_2, target = 'Cluster')
173
+ lr = create_model('lr')
174
+ # this is how you can recreate the table
175
+ feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
176
+ # sort by feature importance value and filter top 10
177
+ feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
178
+ # Display the filtered table in Streamlit
179
+ # st.dataframe(feat_imp)
180
+ # Display the filtered table as a bar chart in Streamlit
181
+ st.bar_chart(feat_imp.set_index('Feature'))
182
+
183
  elif page == "Anomaly Detection":
184
  st.header('Anomaly Detection')
185
 
 
241
  # train model
242
  anomaly_model = create_model(selected_model)
243
 
244
+ st.header("Assign Model")
245
  anomaly_model_2 = assign_model(anomaly_model)
246
  anomaly_model_2
247
 
248
+ st.header("Anomaly Metrics")
249
  anomaly_results = pull()
250
  anomaly_results
251
 
252
  # plot
253
+ st.header("Anomaly Plots")
254
  plot_model(anomaly_model, plot = 'tsne', display_format = 'streamlit')
255
+ plot_model(anomaly_model, plot = 'umap', display_format = 'streamlit')
256
+
257
+ # Create a Classification Model to extract feature importance
258
+ st.header("Feature Importance")
259
+ from pycaret.classification import *
260
+ s = setup(anomaly_model_2, target = 'Anomaly')
261
+ lr = create_model('lr')
262
+ # this is how you can recreate the table
263
+ feat_imp = pd.DataFrame({'Feature': get_config('X_train').columns, 'Value' : abs(lr.coef_[0])}).sort_values(by='Value', ascending=False)
264
+ # sort by feature importance value and filter top 10
265
+ feat_imp = feat_imp.sort_values(by='Value', ascending=False).head(10)
266
+ # Display the filtered table in Streamlit
267
+ # st.dataframe(feat_imp)
268
+ # Display the filtered table as a bar chart in Streamlit
269
+ st.bar_chart(feat_imp.set_index('Feature'))