eudataexplorer / pages /2_Analyses.py
huathedev's picture
add files
24fa32b
import os
import sys
sys.path.insert(1, os.path.abspath(".."))
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import plotly.express as px
import streamlit as st
from statsmodels.stats.descriptivestats import describe
from stat_mod import *
# Set style of plots
plt.style.use("seaborn-whitegrid")
# Set plot resolution
mpl.rcParams["figure.dpi"] = 300
# Configure Streamlit page properties
st.set_page_config(page_title="Analyses📊", page_icon="📊")
# Page title
st.title("Statistical Analyses📊")
st.markdown(analysis_text, unsafe_allow_html=True)
# Read data CSV file
df = pd.read_csv("data/eu_region_data.csv")
num_cols = list(df.columns[3:])
# Define tabs on page
tab_str = [
"Descriptive Stats",
"Regression Model",
]
tab1, tab2 = st.tabs(tab_str)
with tab1:
st.markdown("##### Descriptive Stats for NUTS 2 Regional Data")
with st.expander("View NUTS 2 Regional Data"):
st.dataframe(df.style.format(precision=2))
st.write("This custom dataset was obtained using the EuroStat API.")
st.download_button(
"Download Dataset (CSV)",
df.to_csv(index=False, float_format="%.2f").encode("utf-8"),
"nuts2_dataset.csv",
"text/csv",
key="download-csv",
)
with st.expander("Summary Stats for NUTS 2 Regional Data"):
df_desc = describe(df, percentiles=[25, 75])
st.dataframe(df_desc.style.format(precision=2))
with st.expander("Distribution Plot for NUTS 2 Regional Data", expanded=True):
desc_cont = st.container()
col1, col2 = st.columns(2)
variable = col1.selectbox("Choose Variable: ", options=num_cols, index=1)
plot_type = col2.selectbox(
"Choose Plot: ", options=["Kernel Distribution Estimation Plot", "Box Plot"]
)
with desc_cont:
if plot_type == "Kernel Distribution Estimation Plot":
fig1 = kde_plt(df, variable)
st.pyplot(fig1)
elif plot_type == "Box Plot":
fig1 = box_plt(df, variable)
st.plotly_chart(fig1, use_container_width=True)
with st.expander("Correlation Heat Map for NUTS 2 Regional Data"):
fig = corr_heatmap(df)
st.pyplot(fig)
with tab2:
st.markdown("##### Regression Modelling for NUTS 2 Regional Data")
lin_reg_cont = st.container()
col3, col4 = st.columns(2)
col5, col6 = st.columns(2)
iv = col3.selectbox("Choose X: ", options=num_cols, index=1)
dv = col4.selectbox("Choose Y: ", options=num_cols, index=3)
model_dict = {"Linear Regression": "ols", "LOWESS": "lowess"}
model = col5.radio("Choose Model: ", options=model_dict.keys(), horizontal=True)
if model == "Linear Regression":
show_res = col6.checkbox("View Model Summary", value=False)
with lin_reg_cont:
fig2 = lin_reg_plt(df, iv, dv, model_dict[model])
st.plotly_chart(fig2, use_container_width=True)
if "show_res" in globals() and show_res == True:
summary = px.get_trendline_results(fig2).px_fit_results.iloc[0].summary()
st.write(summary)