Spaces:

JUNGU
/

PPPDC_example

Build error

App Files Files Community

PPPDC_example / app.py

JUNGU

Update app.py

cc89531 verified 12 months ago

raw

history blame

5.68 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	from io import StringIO
	import openpyxl
	from st_aggrid import AgGrid, GridUpdateMode
	from st_aggrid.grid_options_builder import GridOptionsBuilder
	import matplotlib.font_manager as fm

	# 한글 폰트 설정
	font_path = "./Pretendard-Bold.ttf" # 실제 폰트 파일 경로로 변경해주세요
	fm.fontManager.addfont(font_path)
	plt.rc('font', family='Pretendard-Bold') # 'your_font_name'을 실제 폰트 이름으로 변경해주세요

	def load_data(file):
	file_extension = file.name.split('.')[-1].lower()
	if file_extension == 'csv':
	data = pd.read_csv(file)
	elif file_extension in ['xls', 'xlsx']:
	data = pd.read_excel(file)
	else:
	st.error("지원되지 않는 파일 형식입니다. CSV, XLS, 또는 XLSX 파일을 업로드해주세요.")
	return None
	return data

	def manual_data_entry():
	st.subheader("수동 데이터 입력")
	col_names = st.text_input("열 이름을 쉼표로 구분하여 입력하세요:").split(',')
	col_names = [name.strip() for name in col_names if name.strip()]

	if col_names:
	num_rows = st.number_input("행의 수를 입력하세요:", min_value=1, value=5)
	data = pd.DataFrame(columns=col_names, index=range(num_rows))

	gd = GridOptionsBuilder.from_dataframe(data)
	gd.configure_default_column(editable=True)
	gridoptions = gd.build()

	grid_table = AgGrid(data, gridOptions=gridoptions,
	update_mode=GridUpdateMode.VALUE_CHANGED,
	height=400)

	return grid_table['data']
	return None

	def preprocess_data(data):
	st.subheader("데이터 전처리")

	# 결측치 처리
	if data.isnull().sum().sum() > 0:
	st.write("결측치 처리:")
	for column in data.columns:
	if data[column].isnull().sum() > 0:
	method = st.selectbox(f"{column} 열의 처리 방법 선택:",
	["제거", "평균으로 대체", "중앙값으로 대체", "최빈값으로 대체"])
	if method == "제거":
	data = data.dropna(subset=[column])
	elif method == "평균으로 대체":
	data[column].fillna(data[column].mean(), inplace=True)
	elif method == "중앙값으로 대체":
	data[column].fillna(data[column].median(), inplace=True)
	elif method == "최빈값으로 대체":
	data[column].fillna(data[column].mode()[0], inplace=True)

	# 데이터 타입 변환
	for column in data.columns:
	if data[column].dtype == 'object':
	try:
	data[column] = pd.to_numeric(data[column])
	st.write(f"{column} 열을 숫자형으로 변환했습니다.")
	except ValueError:
	st.write(f"{column} 열은 범주형으로 유지됩니다.")

	return data

	def perform_analysis(data):
	st.header("탐색적 데이터 분석")

	# 요약 통계
	st.write("요약 통계:")
	st.write(data.describe())

	# 상관관계 히트맵
	st.write("상관관계 히트맵:")
	numeric_data = data.select_dtypes(include=['float64', 'int64'])
	if not numeric_data.empty:
	fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
	fig.update_layout(title='상관관계 히트맵')
	st.plotly_chart(fig)
	else:
	st.write("상관관계 히트맵을 그릴 수 있는 숫자형 열이 없습니다.")

	# 산점도 행렬
	st.write("산점도 행렬:")
	if not numeric_data.empty:
	fig = px.scatter_matrix(numeric_data)
	fig.update_layout(title='산점도 행렬')
	st.plotly_chart(fig)
	else:
	st.write("산점도 행렬을 그릴 수 있는 숫자형 열이 없습니다.")

	# 히스토그램
	st.write("히스토그램:")
	for column in numeric_data.columns:
	fig = px.histogram(data, x=column, marginal='box')
	fig.update_layout(title=f'{column} 히스토그램')
	st.plotly_chart(fig)

	# 박스플롯
	st.write("박스플롯:")
	for column in numeric_data.columns:
	fig = px.box(data, y=column)
	fig.update_layout(title=f'{column} 박스플롯')
	st.plotly_chart(fig)

	# 범주형 변수 막대 그래프
	categorical_columns = data.select_dtypes(include=['object']).columns
	if not categorical_columns.empty:
	st.write("범주형 변수 막대 그래프:")
	for column in categorical_columns:
	fig = px.bar(data[column].value_counts().reset_index(), x='index', y=column)
	fig.update_layout(title=f'{column} 분포', xaxis_title=column, yaxis_title='개수')
	st.plotly_chart(fig)

	def main():
	st.title("인터랙티브 EDA 툴킷")

	data_input_method = st.radio("데이터 입력 방법 선택:", ("파일 업로드", "수동 입력"))

	if data_input_method == "파일 업로드":
	uploaded_file = st.file_uploader("CSV, XLS, 또는 XLSX 파일을 선택하세요", type=["csv", "xls", "xlsx"])
	if uploaded_file is not None:
	data = load_data(uploaded_file)
	else:
	data = None
	else:
	data = manual_data_entry()

	if data is not None:
	st.write("데이터 미리보기:")
	st.write(data.head())

	data = preprocess_data(data)
	perform_analysis(data)

	if __name__ == "__main__":
	main()