Update app.py
Browse files
@@ -141,9 +141,10 @@ def create_component_for_analysis_for_single_df(selected_files, dfs, i):
141 |
for action in analysis_actions:
142 |
143 |
if action == 'Sample Data':
144 |
145 |
elif action == 'Get Profile':
146 |
147 |
full_data_check = st.checkbox("Report on all columns",key="filter_data_check"+str(i))
148 |
149 |
if full_data_check:
@@ -160,12 +161,11 @@ def create_component_for_analysis_for_single_df(selected_files, dfs, i):
160 |
161 |
162 |
elif action == 'Summary of Data':
163 |
164 |
165 |
# key=action + "_col_filter_" + str(i))
166 |
# selected_filter_vals = st.multiselect("Select Values to Filter on ", df[col_to_filter].unique(),
167 |
# key=action + "_col_filter_val_" + str(i))
168 |
elif action == 'Univariate Analysis':
169 |
cols_for_analysis = st.multiselect("Select Columns for Univariate Analysis",options= df_for_analysis.columns.values)
170 |
for col in cols_for_analysis:
171 |
if str(df_for_analysis[col].dtype) in ['int64','float64'] and df_for_analysis[col].nunique() > 10 :
@@ -194,14 +194,23 @@ def create_component_for_analysis_for_single_df(selected_files, dfs, i):
194 |
# fig.update_layout(height=200, width=400, title_text=f"{col} data distribution")
195 |
196 |
st.plotly_chart(fig, use_container_width=True)
197 |
198 |
elif action == "Bivariate Analysis":
199 |
create_for_bivariate_analysis(selected_files, df, i)
200 |
201 |
def create_for_bivariate_analysis(selected_files, df, i):
202 |
203 |
# st.subheader("Visualisation Type")
204 |
viz_type = st.radio("What type of Visualisation?",("Cross Tab",'Box Plot'))
205 |
target_column = st.selectbox("Select the target column ", df.columns.values,
206 |
key= "bivariate_target_column_" + str(i))
207 |
bivariate_columns = st.multiselect("Select the columns to analyse ", df.columns.values,
@@ -209,10 +218,11 @@ def create_for_bivariate_analysis(selected_files, df, i):
209 |
210 |
col_vals = []
211 |
212 |
213 |
for col in bivariate_columns:
214 |
215 |
216 |
if len(col_vals) > 3 :
217 |
st.warning("Too many columns to split on. Please consider reducing the no of columns")
218 |
crosstab_df = pd.crosstab(df[target_column], col_vals, margins=True)
@@ -246,52 +256,53 @@ def create_component_for_data_analysis():
246 |
st.write("Upload a file to start analysis")
247 |
248 |
249 |
250 |
st.title("Model Results Analyzer")
251 |
with st.sidebar:
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
if selected_menu == "Home":
266 |
267 |
268 |
elif selected_menu == "Upload Data":
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
elif selected_menu == "Analyze Data":
277 |
278 |
279 |
elif selected_menu == "Add Features":
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
141 |
for action in analysis_actions:
142 |
143 |
if action == 'Sample Data':
144 |
145 |
146 |
elif action == 'Get Profile':
147 |
148 |
full_data_check = st.checkbox("Report on all columns",key="filter_data_check"+str(i))
149 |
150 |
if full_data_check:
161 |
162 |
163 |
elif action == 'Summary of Data':
164 |
165 |
166 |
167 |
elif action == 'Univariate Analysis':
168 |
169 |
cols_for_analysis = st.multiselect("Select Columns for Univariate Analysis",options= df_for_analysis.columns.values)
170 |
for col in cols_for_analysis:
171 |
if str(df_for_analysis[col].dtype) in ['int64','float64'] and df_for_analysis[col].nunique() > 10 :
194 |
# fig.update_layout(height=200, width=400, title_text=f"{col} data distribution")
195 |
196 |
st.plotly_chart(fig, use_container_width=True)
197 |
elif action == "Bivariate Analysis":
198 |
199 |
create_for_bivariate_analysis(selected_files, df, i)
200 |
201 |
def clear_chart_type_session_var():
202 |
if 'chart_type' in st.session_state:
203 |
del st.session_state[chart_type]
204 |
205 |
def add_chart_options_to_sidebar():
206 |
if 'chart_type' not in st.session_state :
207 |
with st.sidebar:
208 |
viz_type = st.radio("Graph Type",('None','Cross Tab','Pivot Table','Box Plot'))
209 |
if viz_type and viz_type != 'None':
210 |
st.session_state['chart_type'] == viz_type
211 |
212 |
def create_for_bivariate_analysis(selected_files, df, i):
213 |
214 |
target_column = st.selectbox("Select the target column ", df.columns.values,
215 |
key= "bivariate_target_column_" + str(i))
216 |
bivariate_columns = st.multiselect("Select the columns to analyse ", df.columns.values,
218 |
219 |
col_vals = []
220 |
221 |
if bivariate_columns:
222 |
for col in bivariate_columns:
223 |
224 |
225 |
if st.session_state['chart_type'] == 'Cross Tab':
226 |
if len(col_vals) > 3 :
227 |
st.warning("Too many columns to split on. Please consider reducing the no of columns")
228 |
crosstab_df = pd.crosstab(df[target_column], col_vals, margins=True)
256 |
st.write("Upload a file to start analysis")
257 |
258 |
259 |
def main():
260 |
261 |
st.title("Model Results Analyzer")
262 |
with st.sidebar:
263 |
264 |
selected_menu = option_menu(None, ["Home", "Upload Data", "Add Features","Analyze Data"],
265 |
icons=['house', 'cloud-upload', "list-task", 'gear'],
266 |
menu_icon="cast", default_index=0, orientation="vertical",
267 |
268 |
"container": {"padding": "0!important", "background-color": "#fafafa"},
269 |
"icon": {"color": "orange", "font-size": "15px"},
270 |
"nav-link": {"font-size": "15px", "text-align": "left", "margin": "0px",
271 |
"--hover-color": "#eee"},
272 |
"nav-link-selected": {"background-color": "green"},
273 |
274 |
275 |
if selected_menu == "Home":
276 |
st.markdown('**This is to analyse models performance.**')
277 |
278 |
elif selected_menu == "Upload Data":
279 |
280 |
281 |
282 |
if 'data_files' in st.session_state:
283 |
284 |
data={"File Name": pd.DataFrame.from_dict(st.session_state['data_files'], orient='index').index}))
285 |
286 |
elif selected_menu == "Analyze Data":
287 |
288 |
289 |
elif selected_menu == "Add Features":
290 |
if 'data_files' in st.session_state:
291 |
selected_file = st.selectbox("Select the File(S) to analyze", st.session_state['data_files'].keys())
292 |
293 |
if selected_file:
294 |
df = st.session_state['data_frames'][selected_file]
295 |
st.header("Enter the function definiton to create a new feature")
296 |
feature_name = st.text_input("Enter the New Feature Name")
297 |
st.warning("please retain the function signature as 'add_feature(row)'")
298 |
299 |
content = st_ace(language="python",value="def add_feature(row):")
300 |
301 |
if content != 'def add_feature(row):':
302 |
303 |
df[feature_name] = df.apply(lambda x:add_feature(x),axis=1)
304 |
305 |
st.session_state['data_frames'][selected_file] = df
306 |
307 |
308 |