PPPDC_example / app.py
JUNGU's picture
Update app.py
7abab37 verified
raw
history blame
5.42 kB
def initialize_session_state():
if 'data' not in st.session_state:
st.session_state.data = None
if 'processed_data' not in st.session_state:
st.session_state.processed_data = None
if 'slicers' not in st.session_state:
st.session_state.slicers = {}
if 'x_var' not in st.session_state:
st.session_state.x_var = None
if 'y_var' not in st.session_state:
st.session_state.y_var = None
if 'analysis_performed' not in st.session_state:
st.session_state.analysis_performed = False
def create_slicers(data):
categorical_columns = data.select_dtypes(include=['object', 'category']).columns
for col in categorical_columns:
if data[col].nunique() <= 10: # κ³ μœ κ°’μ΄ 10개 μ΄ν•˜μΈ κ²½μš°μ—λ§Œ μŠ¬λΌμ΄μ„œ 생성
if col not in st.session_state.slicers:
st.session_state.slicers[col] = sorted(data[col].unique())
st.session_state.slicers[col] = st.multiselect(
f"{col} 선택",
options=sorted(data[col].unique()),
default=st.session_state.slicers[col]
)
def apply_slicers(data):
for col, selected_values in st.session_state.slicers.items():
if selected_values:
data = data[data[col].isin(selected_values)]
return data
def perform_analysis(data):
st.header("탐색적 데이터 뢄석")
# μŠ¬λΌμ΄μ„œ 생성 및 적용
create_slicers(data)
filtered_data = apply_slicers(data)
# μš”μ•½ 톡계
st.write("μš”μ•½ 톡계:")
st.write(filtered_data.describe())
# 상관관계 히트맡
st.write("상관관계 히트맡:")
numeric_data = filtered_data.select_dtypes(include=['float64', 'int64'])
if not numeric_data.empty:
fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
fig.update_layout(title='상관관계 히트맡')
st.plotly_chart(fig)
else:
st.write("상관관계 νžˆνŠΈλ§΅μ„ 그릴 수 μžˆλŠ” μˆ«μžν˜• 열이 μ—†μŠ΅λ‹ˆλ‹€.")
# μ‚¬μš©μžκ°€ μ„ νƒν•œ 두 λ³€μˆ˜μ— λŒ€ν•œ 산점도 및 νšŒκ·€ 뢄석
st.subheader("두 λ³€μˆ˜ κ°„μ˜ 관계 뢄석")
numeric_columns = filtered_data.select_dtypes(include=['float64', 'int64']).columns
st.session_state.x_var = st.selectbox("XμΆ• λ³€μˆ˜ 선택", options=numeric_columns, key='x_var_select', index=numeric_columns.get_loc(st.session_state.x_var) if st.session_state.x_var in numeric_columns else 0)
y_options = [col for col in numeric_columns if col != st.session_state.x_var]
st.session_state.y_var = st.selectbox("YμΆ• λ³€μˆ˜ 선택", options=y_options, key='y_var_select', index=y_options.index(st.session_state.y_var) if st.session_state.y_var in y_options else 0)
if st.session_state.x_var and st.session_state.y_var:
fig = px.scatter(filtered_data, x=st.session_state.x_var, y=st.session_state.y_var, color='반' if '반' in filtered_data.columns else None)
# νšŒκ·€μ„  μΆ”κ°€
x = filtered_data[st.session_state.x_var]
y = filtered_data[st.session_state.y_var]
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
line_x = np.array([x.min(), x.max()])
line_y = slope * line_x + intercept
fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νšŒκ·€μ„ '))
r_squared = r_value ** 2
fig.update_layout(
title=f'{st.session_state.x_var}와 {st.session_state.y_var}의 관계 (R-squared: {r_squared:.4f})',
xaxis_title=st.session_state.x_var,
yaxis_title=st.session_state.y_var,
annotations=[
dict(
x=0.5,
y=1.05,
xref='paper',
yref='paper',
text=f'R-squared: {r_squared:.4f}',
showarrow=False,
)
]
)
st.plotly_chart(fig)
# μΆ”κ°€ 톡계 정보
st.write(f"μƒκ΄€κ³„μˆ˜: {r_value:.4f}")
st.write(f"p-value: {p_value:.4f}")
st.write(f"ν‘œμ€€ 였차: {std_err:.4f}")
st.session_state.analysis_performed = True
def main():
st.title("μΈν„°λž™ν‹°λΈŒ EDA νˆ΄ν‚·")
initialize_session_state()
if st.session_state.data is None:
data_input_method = st.radio("데이터 μž…λ ₯ 방법 선택:", ("파일 μ—…λ‘œλ“œ", "μˆ˜λ™ μž…λ ₯"))
if data_input_method == "파일 μ—…λ‘œλ“œ":
uploaded_file = st.file_uploader("CSV, XLS, λ˜λŠ” XLSX νŒŒμΌμ„ μ„ νƒν•˜μ„Έμš”", type=["csv", "xls", "xlsx"])
if uploaded_file is not None:
st.session_state.data = load_data(uploaded_file)
else:
st.session_state.data = manual_data_entry()
if st.session_state.data is not None:
st.subheader("데이터 미리보기 및 μˆ˜μ •")
st.write("데이터λ₯Ό ν™•μΈν•˜κ³  ν•„μš”ν•œ 경우 μˆ˜μ •ν•˜μ„Έμš”:")
edited_data = st.data_editor(st.session_state.data, num_rows="dynamic")
if st.button("데이터 뢄석 μ‹œμž‘") or st.session_state.analysis_performed:
if not st.session_state.analysis_performed:
st.session_state.processed_data = preprocess_data(edited_data)
perform_analysis(st.session_state.processed_data)
if __name__ == "__main__":
main()