Spaces:
Runtime error
Runtime error
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from io import StringIO | |
import openpyxl | |
import matplotlib.pyplot as plt | |
import matplotlib.font_manager as fm | |
# νκΈ ν°νΈ μ€μ | |
def set_font(): | |
font_path = "Pretendard-Bold.ttf" | |
fm.fontManager.addfont(font_path) | |
plt.rcParams['font.family'] = 'Pretendard-Bold' # | |
plt.rcParams['axes.unicode_minus'] = False # λ§μ΄λμ€ κΈ°νΈ κΉ¨μ§ λ°©μ§ | |
# Streamlit μ€μ μ ν°νΈ μ μ© | |
st.set_option('deprecation.showPyplotGlobalUse', False) | |
set_font() | |
def load_data(file): | |
file_extension = file.name.split('.')[-1].lower() | |
if file_extension == 'csv': | |
data = pd.read_csv(file) | |
elif file_extension in ['xls', 'xlsx']: | |
data = pd.read_excel(file) | |
else: | |
st.error("μ§μλμ§ μλ νμΌ νμμ λλ€. CSV, XLS, λλ XLSX νμΌμ μ λ‘λν΄μ£ΌμΈμ.") | |
return None | |
return data | |
def manual_data_entry(): | |
st.subheader("μλ λ°μ΄ν° μ λ ₯") | |
col_names = st.text_input("μ΄ μ΄λ¦μ μΌνλ‘ κ΅¬λΆνμ¬ μ λ ₯νμΈμ:").split(',') | |
col_names = [name.strip() for name in col_names if name.strip()] | |
if col_names: | |
num_rows = st.number_input("μ΄κΈ° νμ μλ₯Ό μ λ ₯νμΈμ:", min_value=1, value=5) | |
data = pd.DataFrame(columns=col_names, index=range(num_rows)) | |
edited_data = st.data_editor(data, num_rows="dynamic") | |
return edited_data | |
return None | |
def preprocess_data(data): | |
st.subheader("λ°μ΄ν° μ μ²λ¦¬") | |
# κ²°μΈ‘μΉ μ²λ¦¬ | |
if data.isnull().sum().sum() > 0: | |
st.write("κ²°μΈ‘μΉ μ²λ¦¬:") | |
for column in data.columns: | |
if data[column].isnull().sum() > 0: | |
method = st.selectbox(f"{column} μ΄μ μ²λ¦¬ λ°©λ² μ ν:", | |
["μ κ±°", "νκ· μΌλ‘ λ체", "μ€μκ°μΌλ‘ λ체", "μ΅λΉκ°μΌλ‘ λ체"]) | |
if method == "μ κ±°": | |
data = data.dropna(subset=[column]) | |
elif method == "νκ· μΌλ‘ λ체": | |
data[column].fillna(data[column].mean(), inplace=True) | |
elif method == "μ€μκ°μΌλ‘ λ체": | |
data[column].fillna(data[column].median(), inplace=True) | |
elif method == "μ΅λΉκ°μΌλ‘ λ체": | |
data[column].fillna(data[column].mode()[0], inplace=True) | |
# λ°μ΄ν° νμ λ³ν | |
for column in data.columns: | |
if data[column].dtype == 'object': | |
try: | |
data[column] = pd.to_numeric(data[column]) | |
st.write(f"{column} μ΄μ μ«μνμΌλ‘ λ³ννμ΅λλ€.") | |
except ValueError: | |
st.write(f"{column} μ΄μ λ²μ£ΌνμΌλ‘ μ μ§λ©λλ€.") | |
return data | |
def perform_analysis(data): | |
st.header("νμμ λ°μ΄ν° λΆμ") | |
# μμ½ ν΅κ³ | |
st.write("μμ½ ν΅κ³:") | |
st.write(data.describe()) | |
# μκ΄κ΄κ³ ννΈλ§΅ | |
st.write("μκ΄κ΄κ³ ννΈλ§΅:") | |
numeric_data = data.select_dtypes(include=['float64', 'int64']) | |
if not numeric_data.empty: | |
fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1) | |
fig.update_layout(title='μκ΄κ΄κ³ ννΈλ§΅') | |
st.plotly_chart(fig) | |
else: | |
st.write("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.") | |
# μ°μ λ νλ ¬ | |
st.write("μ°μ λ νλ ¬:") | |
if not numeric_data.empty: | |
fig = px.scatter_matrix(numeric_data) | |
fig.update_layout(title='μ°μ λ νλ ¬') | |
st.plotly_chart(fig) | |
else: | |
st.write("μ°μ λ νλ ¬μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.") | |
# νμ€ν κ·Έλ¨ | |
st.write("νμ€ν κ·Έλ¨:") | |
for column in numeric_data.columns: | |
fig = px.histogram(data, x=column, marginal='box') | |
fig.update_layout(title=f'{column} νμ€ν κ·Έλ¨') | |
st.plotly_chart(fig) | |
# λ°μ€νλ‘― | |
st.write("λ°μ€νλ‘―:") | |
for column in numeric_data.columns: | |
fig = px.box(data, y=column) | |
fig.update_layout(title=f'{column} λ°μ€νλ‘―') | |
st.plotly_chart(fig) | |
# λ²μ£Όν λ³μ λ§λ κ·Έλν | |
categorical_columns = data.select_dtypes(include=['object']).columns | |
if not categorical_columns.empty: | |
st.write("λ²μ£Όν λ³μ λ§λ κ·Έλν:") | |
for column in categorical_columns: | |
value_counts = data[column].value_counts().reset_index() | |
value_counts.columns = ['category', 'count'] | |
fig = px.bar(value_counts, x='category', y='count', title=f'{column} λΆν¬') | |
fig.update_layout(xaxis_title=column, yaxis_title='κ°μ') | |
st.plotly_chart(fig) | |
def main(): | |
st.title("μΈν°λν°λΈ EDA ν΄ν·") | |
data_input_method = st.radio("λ°μ΄ν° μ λ ₯ λ°©λ² μ ν:", ("νμΌ μ λ‘λ", "μλ μ λ ₯")) | |
if data_input_method == "νμΌ μ λ‘λ": | |
uploaded_file = st.file_uploader("CSV, XLS, λλ XLSX νμΌμ μ ννμΈμ", type=["csv", "xls", "xlsx"]) | |
if uploaded_file is not None: | |
data = load_data(uploaded_file) | |
else: | |
data = None | |
else: | |
data = manual_data_entry() | |
if data is not None: | |
st.subheader("λ°μ΄ν° 미리보기 λ° μμ ") | |
st.write("λ°μ΄ν°λ₯Ό νμΈνκ³ νμν κ²½μ° μμ νμΈμ:") | |
edited_data = st.data_editor(data, num_rows="dynamic") | |
if st.button("λ°μ΄ν° λΆμ μμ"): | |
processed_data = preprocess_data(edited_data) | |
perform_analysis(processed_data) | |
if __name__ == "__main__": | |
main() |