Spaces:
Runtime error
Runtime error
def initialize_session_state(): | |
if 'data' not in st.session_state: | |
st.session_state.data = None | |
if 'processed_data' not in st.session_state: | |
st.session_state.processed_data = None | |
if 'slicers' not in st.session_state: | |
st.session_state.slicers = {} | |
if 'x_var' not in st.session_state: | |
st.session_state.x_var = None | |
if 'y_var' not in st.session_state: | |
st.session_state.y_var = None | |
if 'analysis_performed' not in st.session_state: | |
st.session_state.analysis_performed = False | |
def create_slicers(data): | |
categorical_columns = data.select_dtypes(include=['object', 'category']).columns | |
for col in categorical_columns: | |
if data[col].nunique() <= 10: # κ³ μ κ°μ΄ 10κ° μ΄νμΈ κ²½μ°μλ§ μ¬λΌμ΄μ μμ± | |
if col not in st.session_state.slicers: | |
st.session_state.slicers[col] = sorted(data[col].unique()) | |
st.session_state.slicers[col] = st.multiselect( | |
f"{col} μ ν", | |
options=sorted(data[col].unique()), | |
default=st.session_state.slicers[col] | |
) | |
def apply_slicers(data): | |
for col, selected_values in st.session_state.slicers.items(): | |
if selected_values: | |
data = data[data[col].isin(selected_values)] | |
return data | |
def perform_analysis(data): | |
st.header("νμμ λ°μ΄ν° λΆμ") | |
# μ¬λΌμ΄μ μμ± λ° μ μ© | |
create_slicers(data) | |
filtered_data = apply_slicers(data) | |
# μμ½ ν΅κ³ | |
st.write("μμ½ ν΅κ³:") | |
st.write(filtered_data.describe()) | |
# μκ΄κ΄κ³ ννΈλ§΅ | |
st.write("μκ΄κ΄κ³ ννΈλ§΅:") | |
numeric_data = filtered_data.select_dtypes(include=['float64', 'int64']) | |
if not numeric_data.empty: | |
fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1) | |
fig.update_layout(title='μκ΄κ΄κ³ ννΈλ§΅') | |
st.plotly_chart(fig) | |
else: | |
st.write("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.") | |
# μ¬μ©μκ° μ νν λ λ³μμ λν μ°μ λ λ° νκ· λΆμ | |
st.subheader("λ λ³μ κ°μ κ΄κ³ λΆμ") | |
numeric_columns = filtered_data.select_dtypes(include=['float64', 'int64']).columns | |
st.session_state.x_var = st.selectbox("XμΆ λ³μ μ ν", options=numeric_columns, key='x_var_select', index=numeric_columns.get_loc(st.session_state.x_var) if st.session_state.x_var in numeric_columns else 0) | |
y_options = [col for col in numeric_columns if col != st.session_state.x_var] | |
st.session_state.y_var = st.selectbox("YμΆ λ³μ μ ν", options=y_options, key='y_var_select', index=y_options.index(st.session_state.y_var) if st.session_state.y_var in y_options else 0) | |
if st.session_state.x_var and st.session_state.y_var: | |
fig = px.scatter(filtered_data, x=st.session_state.x_var, y=st.session_state.y_var, color='λ°' if 'λ°' in filtered_data.columns else None) | |
# νκ·μ μΆκ° | |
x = filtered_data[st.session_state.x_var] | |
y = filtered_data[st.session_state.y_var] | |
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) | |
line_x = np.array([x.min(), x.max()]) | |
line_y = slope * line_x + intercept | |
fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νκ·μ ')) | |
r_squared = r_value ** 2 | |
fig.update_layout( | |
title=f'{st.session_state.x_var}μ {st.session_state.y_var}μ κ΄κ³ (R-squared: {r_squared:.4f})', | |
xaxis_title=st.session_state.x_var, | |
yaxis_title=st.session_state.y_var, | |
annotations=[ | |
dict( | |
x=0.5, | |
y=1.05, | |
xref='paper', | |
yref='paper', | |
text=f'R-squared: {r_squared:.4f}', | |
showarrow=False, | |
) | |
] | |
) | |
st.plotly_chart(fig) | |
# μΆκ° ν΅κ³ μ 보 | |
st.write(f"μκ΄κ³μ: {r_value:.4f}") | |
st.write(f"p-value: {p_value:.4f}") | |
st.write(f"νμ€ μ€μ°¨: {std_err:.4f}") | |
st.session_state.analysis_performed = True | |
def main(): | |
st.title("μΈν°λν°λΈ EDA ν΄ν·") | |
initialize_session_state() | |
if st.session_state.data is None: | |
data_input_method = st.radio("λ°μ΄ν° μ λ ₯ λ°©λ² μ ν:", ("νμΌ μ λ‘λ", "μλ μ λ ₯")) | |
if data_input_method == "νμΌ μ λ‘λ": | |
uploaded_file = st.file_uploader("CSV, XLS, λλ XLSX νμΌμ μ ννμΈμ", type=["csv", "xls", "xlsx"]) | |
if uploaded_file is not None: | |
st.session_state.data = load_data(uploaded_file) | |
else: | |
st.session_state.data = manual_data_entry() | |
if st.session_state.data is not None: | |
st.subheader("λ°μ΄ν° 미리보기 λ° μμ ") | |
st.write("λ°μ΄ν°λ₯Ό νμΈνκ³ νμν κ²½μ° μμ νμΈμ:") | |
edited_data = st.data_editor(st.session_state.data, num_rows="dynamic") | |
if st.button("λ°μ΄ν° λΆμ μμ") or st.session_state.analysis_performed: | |
if not st.session_state.analysis_performed: | |
st.session_state.processed_data = preprocess_data(edited_data) | |
perform_analysis(st.session_state.processed_data) | |
if __name__ == "__main__": | |
main() |