JUNGU commited on
Commit
5d3671b
Β·
verified Β·
1 Parent(s): 6607e79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -31
app.py CHANGED
@@ -17,8 +17,8 @@ def set_font():
17
  # 폰트 섀정을 κ°€μ Έμ˜΅λ‹ˆλ‹€
18
  font_settings = set_font()
19
 
20
- # μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™”
21
- def init_session_state():
22
  if 'data' not in st.session_state:
23
  st.session_state.data = None
24
  if 'processed_data' not in st.session_state:
@@ -33,6 +33,8 @@ def init_session_state():
33
  st.session_state.y_var = None
34
  if 'slicers' not in st.session_state:
35
  st.session_state.slicers = {}
 
 
36
 
37
  # 데이터 λ‘œλ“œ
38
  @st.cache_data
@@ -48,29 +50,27 @@ def load_data(file):
48
  return data
49
 
50
  def manual_data_entry():
51
- st.subheader("μˆ˜λ™ 데이터 μž…λ ₯")
52
- col_names = st.text_input("μ—΄ 이름을 μ‰Όν‘œλ‘œ κ΅¬λΆ„ν•˜μ—¬ μž…λ ₯ν•˜μ„Έμš”:").split(',')
53
  col_names = [name.strip() for name in col_names if name.strip()]
54
 
55
  if col_names:
56
- num_rows = st.number_input("초기 ν–‰μ˜ 수λ₯Ό μž…λ ₯ν•˜μ„Έμš”:", min_value=1, value=5)
57
  data = pd.DataFrame(columns=col_names, index=range(num_rows))
58
 
59
- edited_data = st.data_editor(data, num_rows="dynamic")
60
 
61
  return edited_data
62
  return None
63
 
64
  def preprocess_data(data):
65
- st.subheader("데이터 μ „μ²˜λ¦¬")
66
-
67
  # 결츑치 처리
68
  if data.isnull().sum().sum() > 0:
69
  st.write("결츑치 처리:")
70
  for column in data.columns:
71
  if data[column].isnull().sum() > 0:
72
  method = st.selectbox(f"{column} μ—΄μ˜ 처리 방법 선택:",
73
- ["제거", "ν‰κ· μœΌλ‘œ λŒ€μ²΄", "μ€‘μ•™κ°’μœΌλ‘œ λŒ€μ²΄", "μ΅œλΉˆκ°’μœΌλ‘œ λŒ€μ²΄"])
 
74
  if method == "제거":
75
  data = data.dropna(subset=[column])
76
  elif method == "ν‰κ· μœΌλ‘œ λŒ€μ²΄":
@@ -101,7 +101,8 @@ def create_slicers(data):
101
  st.session_state.slicers[col] = st.multiselect(
102
  f"{col} 선택",
103
  options=sorted(data[col].unique()),
104
- default=sorted(data[col].unique())
 
105
  )
106
 
107
  def apply_slicers(data):
@@ -150,12 +151,12 @@ def plot_scatter_with_regression(data, x_var, y_var):
150
  st.write(f"p-value: {p_value:.4f}")
151
  st.write(f"ν‘œμ€€ 였차: {std_err:.4f}")
152
 
153
- def perform_analysis(data):
154
  st.header("탐색적 데이터 뢄석")
155
 
156
  # μŠ¬λΌμ΄μ„œ 생성 및 적용
157
- create_slicers(data)
158
- filtered_data = apply_slicers(data)
159
 
160
  # μš”μ•½ 톡계
161
  st.write("μš”μ•½ 톡계:")
@@ -167,36 +168,38 @@ def perform_analysis(data):
167
 
168
  # μ‚¬μš©μžκ°€ μ„ νƒν•œ 두 λ³€μˆ˜μ— λŒ€ν•œ 산점도 및 νšŒκ·€ 뢄석
169
  st.subheader("두 λ³€μˆ˜ κ°„μ˜ 관계 뢄석")
170
- x_var = st.selectbox("XμΆ• λ³€μˆ˜ 선택", options=st.session_state.numeric_columns, key='x_var')
171
- y_var = st.selectbox("YμΆ• λ³€μˆ˜ 선택", options=[col for col in st.session_state.numeric_columns if col != x_var], key='y_var')
 
 
 
172
 
173
- if x_var and y_var:
174
- plot_scatter_with_regression(filtered_data, x_var, y_var)
175
 
176
  def main():
177
  st.title("μΈν„°λž™ν‹°λΈŒ EDA νˆ΄ν‚·")
178
 
179
- init_session_state()
180
 
181
- data_input_method = st.radio("데이터 μž…λ ₯ 방법 선택:", ("파일 μ—…λ‘œλ“œ", "μˆ˜λ™ μž…λ ₯"))
182
-
183
- if data_input_method == "파일 μ—…λ‘œλ“œ":
184
- uploaded_file = st.file_uploader("CSV, XLS, λ˜λŠ” XLSX νŒŒμΌμ„ μ„ νƒν•˜μ„Έμš”", type=["csv", "xls", "xlsx"])
185
- if uploaded_file is not None:
186
- st.session_state.data = load_data(uploaded_file)
 
187
  else:
188
- st.session_state.data = None
189
- else:
190
- st.session_state.data = manual_data_entry()
191
 
192
  if st.session_state.data is not None:
193
  st.subheader("데이터 미리보기 및 μˆ˜μ •")
194
  st.write("데이터λ₯Ό ν™•μΈν•˜κ³  ν•„μš”ν•œ 경우 μˆ˜μ •ν•˜μ„Έμš”:")
195
- edited_data = st.data_editor(st.session_state.data, num_rows="dynamic")
196
 
197
- if st.button("데이터 뢄석 μ‹œμž‘"):
198
- st.session_state.processed_data = preprocess_data(edited_data)
199
- perform_analysis(st.session_state.processed_data)
 
200
 
201
  if __name__ == "__main__":
202
  main()
 
17
  # 폰트 섀정을 κ°€μ Έμ˜΅λ‹ˆλ‹€
18
  font_settings = set_font()
19
 
20
+ # μ„Έμ…˜ μƒνƒœ μ΄ˆκΈ°ν™” 및 관리
21
+ def manage_session_state():
22
  if 'data' not in st.session_state:
23
  st.session_state.data = None
24
  if 'processed_data' not in st.session_state:
 
33
  st.session_state.y_var = None
34
  if 'slicers' not in st.session_state:
35
  st.session_state.slicers = {}
36
+ if 'analysis_performed' not in st.session_state:
37
+ st.session_state.analysis_performed = False
38
 
39
  # 데이터 λ‘œλ“œ
40
  @st.cache_data
 
50
  return data
51
 
52
  def manual_data_entry():
53
+ col_names = st.text_input("μ—΄ 이름을 μ‰Όν‘œλ‘œ κ΅¬λΆ„ν•˜μ—¬ μž…λ ₯ν•˜μ„Έμš”:", key="manual_col_names").split(',')
 
54
  col_names = [name.strip() for name in col_names if name.strip()]
55
 
56
  if col_names:
57
+ num_rows = st.number_input("초기 ν–‰μ˜ 수λ₯Ό μž…λ ₯ν•˜μ„Έμš”:", min_value=1, value=5, key="manual_num_rows")
58
  data = pd.DataFrame(columns=col_names, index=range(num_rows))
59
 
60
+ edited_data = st.data_editor(data, num_rows="dynamic", key="manual_data_editor")
61
 
62
  return edited_data
63
  return None
64
 
65
  def preprocess_data(data):
 
 
66
  # 결츑치 처리
67
  if data.isnull().sum().sum() > 0:
68
  st.write("결츑치 처리:")
69
  for column in data.columns:
70
  if data[column].isnull().sum() > 0:
71
  method = st.selectbox(f"{column} μ—΄μ˜ 처리 방법 선택:",
72
+ ["제거", "ν‰κ· μœΌλ‘œ λŒ€μ²΄", "μ€‘μ•™κ°’μœΌλ‘œ λŒ€μ²΄", "μ΅œλΉˆκ°’μœΌλ‘œ λŒ€μ²΄"],
73
+ key=f"missing_{column}")
74
  if method == "제거":
75
  data = data.dropna(subset=[column])
76
  elif method == "ν‰κ· μœΌλ‘œ λŒ€μ²΄":
 
101
  st.session_state.slicers[col] = st.multiselect(
102
  f"{col} 선택",
103
  options=sorted(data[col].unique()),
104
+ default=sorted(data[col].unique()),
105
+ key=f"slicer_{col}"
106
  )
107
 
108
  def apply_slicers(data):
 
151
  st.write(f"p-value: {p_value:.4f}")
152
  st.write(f"ν‘œμ€€ 였차: {std_err:.4f}")
153
 
154
+ def perform_analysis():
155
  st.header("탐색적 데이터 뢄석")
156
 
157
  # μŠ¬λΌμ΄μ„œ 생성 및 적용
158
+ create_slicers(st.session_state.processed_data)
159
+ filtered_data = apply_slicers(st.session_state.processed_data)
160
 
161
  # μš”μ•½ 톡계
162
  st.write("μš”μ•½ 톡계:")
 
168
 
169
  # μ‚¬μš©μžκ°€ μ„ νƒν•œ 두 λ³€μˆ˜μ— λŒ€ν•œ 산점도 및 νšŒκ·€ 뢄석
170
  st.subheader("두 λ³€μˆ˜ κ°„μ˜ 관계 뢄석")
171
+ st.session_state.x_var = st.selectbox("XμΆ• λ³€μˆ˜ 선택", options=st.session_state.numeric_columns, key='x_var')
172
+ st.session_state.y_var = st.selectbox("YμΆ• λ³€μˆ˜ 선택", options=[col for col in st.session_state.numeric_columns if col != st.session_state.x_var], key='y_var')
173
+
174
+ if st.session_state.x_var and st.session_state.y_var:
175
+ plot_scatter_with_regression(filtered_data, st.session_state.x_var, st.session_state.y_var)
176
 
177
+ st.session_state.analysis_performed = True
 
178
 
179
  def main():
180
  st.title("μΈν„°λž™ν‹°λΈŒ EDA νˆ΄ν‚·")
181
 
182
+ manage_session_state()
183
 
184
+ if not st.session_state.data:
185
+ data_input_method = st.radio("데이터 μž…λ ₯ 방법 선택:", ("파일 μ—…λ‘œλ“œ", "μˆ˜λ™ μž…λ ₯"), key="data_input_method")
186
+
187
+ if data_input_method == "파일 μ—…λ‘œλ“œ":
188
+ uploaded_file = st.file_uploader("CSV, XLS, λ˜λŠ” XLSX νŒŒμΌμ„ μ„ νƒν•˜μ„Έμš”", type=["csv", "xls", "xlsx"], key="file_uploader")
189
+ if uploaded_file:
190
+ st.session_state.data = load_data(uploaded_file)
191
  else:
192
+ st.session_state.data = manual_data_entry()
 
 
193
 
194
  if st.session_state.data is not None:
195
  st.subheader("데이터 미리보기 및 μˆ˜μ •")
196
  st.write("데이터λ₯Ό ν™•μΈν•˜κ³  ν•„μš”ν•œ 경우 μˆ˜μ •ν•˜μ„Έμš”:")
197
+ edited_data = st.data_editor(st.session_state.data, num_rows="dynamic", key="data_editor")
198
 
199
+ if st.button("데이터 뢄석 μ‹œμž‘", key="start_analysis") or st.session_state.analysis_performed:
200
+ if not st.session_state.analysis_performed:
201
+ st.session_state.processed_data = preprocess_data(edited_data)
202
+ perform_analysis()
203
 
204
  if __name__ == "__main__":
205
  main()