Update app.py
Browse files
app.py
CHANGED
@@ -141,9 +141,10 @@ def create_component_for_analysis_for_single_df(selected_files, dfs, i):
|
|
141 |
for action in analysis_actions:
|
142 |
|
143 |
if action == 'Sample Data':
|
|
|
144 |
st.write(df_for_analysis.sample(10))
|
145 |
elif action == 'Get Profile':
|
146 |
-
|
147 |
full_data_check = st.checkbox("Report on all columns",key="filter_data_check"+str(i))
|
148 |
|
149 |
if full_data_check:
|
@@ -160,12 +161,11 @@ def create_component_for_analysis_for_single_df(selected_files, dfs, i):
|
|
160 |
st_profile_report(pr)
|
161 |
|
162 |
elif action == 'Summary of Data':
|
|
|
163 |
st.write(df_for_analysis.describe())
|
164 |
-
|
165 |
-
# key=action + "_col_filter_" + str(i))
|
166 |
-
# selected_filter_vals = st.multiselect("Select Values to Filter on ", df[col_to_filter].unique(),
|
167 |
-
# key=action + "_col_filter_val_" + str(i))
|
168 |
elif action == 'Univariate Analysis':
|
|
|
169 |
cols_for_analysis = st.multiselect("Select Columns for Univariate Analysis",options= df_for_analysis.columns.values)
|
170 |
for col in cols_for_analysis:
|
171 |
if str(df_for_analysis[col].dtype) in ['int64','float64'] and df_for_analysis[col].nunique() > 10 :
|
@@ -194,14 +194,23 @@ def create_component_for_analysis_for_single_df(selected_files, dfs, i):
|
|
194 |
# fig.update_layout(height=200, width=400, title_text=f"{col} data distribution")
|
195 |
|
196 |
st.plotly_chart(fig, use_container_width=True)
|
197 |
-
|
198 |
elif action == "Bivariate Analysis":
|
|
|
199 |
create_for_bivariate_analysis(selected_files, df, i)
|
200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
def create_for_bivariate_analysis(selected_files, df, i):
|
202 |
-
|
203 |
-
# st.subheader("Visualisation Type")
|
204 |
-
viz_type = st.radio("What type of Visualisation?",("Cross Tab",'Box Plot'))
|
205 |
target_column = st.selectbox("Select the target column ", df.columns.values,
|
206 |
key= "bivariate_target_column_" + str(i))
|
207 |
bivariate_columns = st.multiselect("Select the columns to analyse ", df.columns.values,
|
@@ -209,10 +218,11 @@ def create_for_bivariate_analysis(selected_files, df, i):
|
|
209 |
|
210 |
col_vals = []
|
211 |
|
212 |
-
if
|
213 |
for col in bivariate_columns:
|
214 |
col_vals.append(df[col])
|
215 |
-
|
|
|
216 |
if len(col_vals) > 3 :
|
217 |
st.warning("Too many columns to split on. Please consider reducing the no of columns")
|
218 |
crosstab_df = pd.crosstab(df[target_column], col_vals, margins=True)
|
@@ -246,52 +256,53 @@ def create_component_for_data_analysis():
|
|
246 |
st.write("Upload a file to start analysis")
|
247 |
|
248 |
|
249 |
-
|
250 |
-
st.title("Model Results Analyzer")
|
251 |
-
with st.sidebar:
|
252 |
|
|
|
|
|
253 |
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
|
265 |
-
if selected_menu == "Home":
|
266 |
-
|
267 |
|
268 |
-
elif selected_menu == "Upload Data":
|
269 |
|
270 |
-
|
271 |
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
|
276 |
-
elif selected_menu == "Analyze Data":
|
277 |
-
|
278 |
|
279 |
-
elif selected_menu == "Add Features":
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
|
141 |
for action in analysis_actions:
|
142 |
|
143 |
if action == 'Sample Data':
|
144 |
+
clear_chart_type_session_var()
|
145 |
st.write(df_for_analysis.sample(10))
|
146 |
elif action == 'Get Profile':
|
147 |
+
clear_chart_type_session_var()
|
148 |
full_data_check = st.checkbox("Report on all columns",key="filter_data_check"+str(i))
|
149 |
|
150 |
if full_data_check:
|
|
|
161 |
st_profile_report(pr)
|
162 |
|
163 |
elif action == 'Summary of Data':
|
164 |
+
clear_chart_type_session_var()
|
165 |
st.write(df_for_analysis.describe())
|
166 |
+
|
|
|
|
|
|
|
167 |
elif action == 'Univariate Analysis':
|
168 |
+
clear_chart_type_session_var()
|
169 |
cols_for_analysis = st.multiselect("Select Columns for Univariate Analysis",options= df_for_analysis.columns.values)
|
170 |
for col in cols_for_analysis:
|
171 |
if str(df_for_analysis[col].dtype) in ['int64','float64'] and df_for_analysis[col].nunique() > 10 :
|
|
|
194 |
# fig.update_layout(height=200, width=400, title_text=f"{col} data distribution")
|
195 |
|
196 |
st.plotly_chart(fig, use_container_width=True)
|
|
|
197 |
elif action == "Bivariate Analysis":
|
198 |
+
add_chart_options_to_sidebar()
|
199 |
create_for_bivariate_analysis(selected_files, df, i)
|
200 |
|
201 |
+
def clear_chart_type_session_var():
|
202 |
+
if 'chart_type' in st.session_state:
|
203 |
+
del st.session_state[chart_type]
|
204 |
+
|
205 |
+
def add_chart_options_to_sidebar():
|
206 |
+
if 'chart_type' not in st.session_state :
|
207 |
+
with st.sidebar:
|
208 |
+
viz_type = st.radio("Graph Type",('None','Cross Tab','Pivot Table','Box Plot'))
|
209 |
+
if viz_type and viz_type != 'None':
|
210 |
+
st.session_state['chart_type'] == viz_type
|
211 |
+
|
212 |
def create_for_bivariate_analysis(selected_files, df, i):
|
213 |
+
|
|
|
|
|
214 |
target_column = st.selectbox("Select the target column ", df.columns.values,
|
215 |
key= "bivariate_target_column_" + str(i))
|
216 |
bivariate_columns = st.multiselect("Select the columns to analyse ", df.columns.values,
|
|
|
218 |
|
219 |
col_vals = []
|
220 |
|
221 |
+
if bivariate_columns:
|
222 |
for col in bivariate_columns:
|
223 |
col_vals.append(df[col])
|
224 |
+
|
225 |
+
if st.session_state['chart_type'] == 'Cross Tab':
|
226 |
if len(col_vals) > 3 :
|
227 |
st.warning("Too many columns to split on. Please consider reducing the no of columns")
|
228 |
crosstab_df = pd.crosstab(df[target_column], col_vals, margins=True)
|
|
|
256 |
st.write("Upload a file to start analysis")
|
257 |
|
258 |
|
259 |
+
def main():
|
|
|
|
|
260 |
|
261 |
+
st.title("Model Results Analyzer")
|
262 |
+
with st.sidebar:
|
263 |
|
264 |
+
selected_menu = option_menu(None, ["Home", "Upload Data", "Add Features","Analyze Data"],
|
265 |
+
icons=['house', 'cloud-upload', "list-task", 'gear'],
|
266 |
+
menu_icon="cast", default_index=0, orientation="vertical",
|
267 |
+
styles={
|
268 |
+
"container": {"padding": "0!important", "background-color": "#fafafa"},
|
269 |
+
"icon": {"color": "orange", "font-size": "15px"},
|
270 |
+
"nav-link": {"font-size": "15px", "text-align": "left", "margin": "0px",
|
271 |
+
"--hover-color": "#eee"},
|
272 |
+
"nav-link-selected": {"background-color": "green"},
|
273 |
+
})
|
274 |
|
275 |
+
if selected_menu == "Home":
|
276 |
+
st.markdown('**This is to analyse models performance.**')
|
277 |
|
278 |
+
elif selected_menu == "Upload Data":
|
279 |
|
280 |
+
create_upload_file_component()
|
281 |
|
282 |
+
if 'data_files' in st.session_state:
|
283 |
+
st.write(pd.DataFrame(
|
284 |
+
data={"File Name": pd.DataFrame.from_dict(st.session_state['data_files'], orient='index').index}))
|
285 |
|
286 |
+
elif selected_menu == "Analyze Data":
|
287 |
+
create_component_for_data_analysis()
|
288 |
|
289 |
+
elif selected_menu == "Add Features":
|
290 |
+
if 'data_files' in st.session_state:
|
291 |
+
selected_file = st.selectbox("Select the File(S) to analyze", st.session_state['data_files'].keys())
|
292 |
+
|
293 |
+
if selected_file:
|
294 |
+
df = st.session_state['data_frames'][selected_file]
|
295 |
+
st.header("Enter the function definiton to create a new feature")
|
296 |
+
feature_name = st.text_input("Enter the New Feature Name")
|
297 |
+
st.warning("please retain the function signature as 'add_feature(row)'")
|
298 |
+
|
299 |
+
content = st_ace(language="python",value="def add_feature(row):")
|
300 |
+
|
301 |
+
if content != 'def add_feature(row):':
|
302 |
+
exec(content)
|
303 |
+
df[feature_name] = df.apply(lambda x:add_feature(x),axis=1)
|
304 |
+
|
305 |
+
st.session_state['data_frames'][selected_file] = df
|
306 |
+
st.write(df.columns.values)
|
307 |
+
|
308 |
+
main()
|