import streamlit as st |
import os |
from streamlit_option_menu import option_menu |
import pandas as pd |
import plotly.express as px |
from plotly.subplots import make_subplots |
import plotly.graph_objects as go |
from streamlit_ace import st_ace |
from streamlit_pandas_profiling import st_profile_report |
def set_data_files_session_object(file_name, file_path): |
if 'data_files' not in st.session_state: |
files_dictionary = {} |
files_dictionary[file_name] = file_path |
st.session_state['data_files'] = files_dictionary |
else: |
files_dictionary = st.session_state['data_files'] |
files_dictionary[file_name] = file_path |
st.session_state['data_files'] = files_dictionary |
def set_filtered_data_session_object(df, file_name): |
if 'filtered_data' not in st.session_state: |
filtered_data_dictionary = {} |
filtered_data_dictionary[file_name] = df |
st.session_state['filtered_data'] = filtered_data_dictionary |
else: |
filtered_data_dictionary = st.session_state['filtered_data'] |
filtered_data_dictionary[file_name] = df |
st.session_state['filtered_data'] = filtered_data_dictionary |
def set_dataframe_session_object(file_name, file_path): |
if 'data_frames' not in st.session_state: |
data_frame_dictionary = {} |
data_frame_dictionary[file_name] = pd.read_csv(file_path) |
st.session_state['data_frames'] = data_frame_dictionary |
else: |
data_frame_dictionary = st.session_state['data_frames'] |
data_frame_dictionary[file_name] = pd.read_csv(file_path) |
st.session_state['data_frames'] = data_frame_dictionary |
def save_file(file_object): |
file_path = os.path.join(os.getcwd(), "uploaded_files", file_object.name) |
with open(file_path, "wb") as f: |
f.write(file_object.getbuffer()) |
set_data_files_session_object(file_object.name, file_path) |
set_dataframe_session_object(file_object.name, file_path) |
def create_upload_file_component(): |
uploaded_files = st.file_uploader("Upload one file at a time.", type=['csv', 'xls', 'xlsx', 'pkl', 'pdf'], |
accept_multiple_files=True) |
if uploaded_files: |
os.makedirs(os.path.join(os.getcwd(), "uploaded_files"), mode=0o777, exist_ok=True) |
for uploaded_file in uploaded_files: |
save_file(uploaded_file) |
def create_component_to_add_target_func(selected_files, dfs, i): |
target_var_name = st.text_input("Name of the target variable",key="target_var" + str(i)) |
code= "def f1(x): return str(x * 3)" |
exec(code) |
st.write(f1(3)) |
def create_component_for_analysis_for_single_df(selected_files, dfs, i): |
st.subheader(selected_files[i]) |
df = dfs[selected_files[i]] |
filter_data = st.checkbox("Analyse on Filtered Data",key="filter_data_check"+str(i)) |
if filter_data: |
action = "data_filter" |
col_to_filter = st.selectbox("Select the field to Filter on ", df.columns.values, |
key= action + "_col_filter_" + str(i)) |
filter_operation = st.selectbox("Operation ", |
['Greater Than', 'Equals', 'Less Than', "In", "In Between"], |
key=action + "_col_filter_op_" + str(i)) |
selected_filter_vals = None |
if filter_operation: |
if filter_operation == 'In': |
selected_filter_vals = st.multiselect("Select Values to Filter on ", df[col_to_filter].unique(), |
key=action + "_col_filter_val_" + str(i)) |
if selected_filter_vals: |
filtered_df = df[df[col_to_filter].isin(selected_filter_vals)] |
elif filter_operation == 'Equals': |
selected_filter_vals = st.text_input("Enter a numeric value", |
key=action + "_col_filter_val_" + str(i)) |
if selected_filter_vals: |
filtered_df = df[df[col_to_filter] == selected_filter_vals] |
elif filter_operation == 'Greater Than': |
selected_filter_vals = st.text_input("Enter a numeric value", |
key=action + "_col_filter_val_" + str(i)) |
if selected_filter_vals: |
filtered_df = df[df[col_to_filter] > selected_filter_vals] |
elif filter_operation == 'Less Than': |
selected_filter_vals = st.text_input("Enter a numeric value", |
key=action + "_col_filter_val_" + str(i)) |
if selected_filter_vals: |
filtered_df = df[df[col_to_filter] < selected_filter_vals] |
elif filter_operation == 'In Between': |
selected_filter_vals = st.select_slider("Select range", |
(df[col_to_filter].min(), df[col_to_filter].max()), |
key=action + "_col_filter_val_" + str(i)) |
if selected_filter_vals: |
filtered_df = df[df[col_to_filter] < selected_filter_vals] |
if selected_filter_vals: |
set_filtered_data_session_object(filtered_df,selected_files[i]) |
analysis_actions = st.multiselect("What analysis do you wish to do?", |
['Summary of Data', 'Sample Data','Get Profile' ,'Univariate Analysis', |
'Bivariate Analysis','Add a Target Column'], key='analysis_action_' + str(i)) |
if analysis_actions: |
df_for_analysis = st.session_state['filtered_data'][selected_files[i]] if filter_data else df |
for action in analysis_actions: |
if action == 'Sample Data': |
st.write(df_for_analysis.sample(10)) |
elif action == 'Get Profile': |
pr = df_for_analysis.profile_report() |
st_profile_report(pr) |
elif action == 'Summary of Data': |
st.write(df_for_analysis.describe()) |
elif action == 'Univariate Analysis': |
cols_for_analysis = st.multiselect("Select Columns for Univariate Analysis",options= df_for_analysis.columns.values) |
for col in cols_for_analysis: |
if str(df_for_analysis[col].dtype) in ['int64','float64'] and df_for_analysis[col].nunique() > 10 : |
fig = px.scatter(x=df_for_analysis.index, y=df_for_analysis[col],labels=dict(x="Index", y=col)) |
st.plotly_chart(fig, use_container_width=True) |
elif str(df_for_analysis[col].dtype) in ['object','category'] or df_for_analysis[col].nunique() <= 10: |
value_dist_df = df_for_analysis[col].value_counts(normalize=True)[:20].reset_index() |
value_dist_df.columns = [col,'% Distribution'] |
value_dist_df_counts = df_for_analysis[col].value_counts()[:20].reset_index() |
value_dist_df_counts.columns = [col,'Count'] |
value_dist_df = value_dist_df.merge(value_dist_df_counts,on=col) |
trace1 = go.Bar(x=value_dist_df[col],y=value_dist_df['Count'],name='Count',marker=dict(color='rgb(34,163,192)')) |
trace2 = go.Scatter(x=value_dist_df[col],y=value_dist_df['% Distribution'],name='% Distribution',yaxis='y2') |
fig = make_subplots(specs=[[{"secondary_y": True}]]) |
fig.add_trace(trace1) |
fig.add_trace(trace2,secondary_y=True) |
fig['layout'].update(height = 600, width = 800, title = f"{col} data distribution",xaxis=dict(tickangle=-90)) |
st.plotly_chart(fig, use_container_width=True) |
elif action == "Add a Target Column": |
code= "def f1(x): return str(x * 3)" |
exec(code) |
st.write(f1(3)) |
def create_component_for_data_analysis(): |
if 'data_files' in st.session_state: |
selected_files = st.multiselect("Select the File(S) to analyze", st.session_state['data_files'].keys()) |
if selected_files: |
cols = st.columns(len(selected_files)) |
dfs = {} |
for selected_file in selected_files: |
if selected_file in st.session_state['data_frames']: |
dfs[selected_file] = st.session_state['data_frames'][selected_file] |
else: |
st.session_state['data_frames'][selected_file] = pd.read_csv(st.session_state['data_files'][selected_file]) |
dfs[selected_file] = st.session_state['data_frames'][selected_file] |
for i, col in enumerate(cols): |
with col: |
create_component_for_analysis_for_single_df(selected_files, dfs, i) |
else: |
st.write("Upload a file to start analysis") |
st.title("Model Results Analyzer") |
with st.sidebar: |
selected_menu = option_menu(None, ["Home", "Upload Data", "Add Features","Analyze Data"], |
icons=['house', 'cloud-upload', "list-task", 'gear'], |
menu_icon="cast", default_index=0, orientation="vertical", |
styles={ |
"container": {"padding": "0!important", "background-color": "#fafafa"}, |
"icon": {"color": "orange", "font-size": "15px"}, |
"nav-link": {"font-size": "15px", "text-align": "left", "margin": "0px", |
"--hover-color": "#eee"}, |
"nav-link-selected": {"background-color": "green"}, |
}) |
if selected_menu == "Home": |
st.markdown('**This is to analyse models performance.**') |
elif selected_menu == "Upload Data": |
create_upload_file_component() |
if 'data_files' in st.session_state: |
st.write(pd.DataFrame( |
data={"File Name": pd.DataFrame.from_dict(st.session_state['data_files'], orient='index').index})) |
elif selected_menu == "Analyze Data": |
create_component_for_data_analysis() |
elif selected_menu == "Add Features": |
if 'data_files' in st.session_state: |
selected_file = st.selectbox("Select the File(S) to analyze", st.session_state['data_files'].keys()) |
if selected_file: |
df = st.session_state['data_frames'][selected_file] |
st.header("Enter the function definiton to create a new feature") |
feature_name = st.text_input("Enter the New Feature Name") |
st.warning("please retain the function signature as 'add_feature(row)'") |
content = st_ace(language="python",value="def add_feature(row):") |
if content != 'def add_feature(row):': |
exec(content) |
df[feature_name] = df.apply(lambda x:add_feature(x),axis=1) |
st.session_state['data_frames'][selected_file] = df |
st.write(df.columns.values) |