JUNGU commited on
Commit
9357bdd
Β·
verified Β·
1 Parent(s): fb18f50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -6
app.py CHANGED
@@ -8,6 +8,7 @@ import openpyxl
8
  import matplotlib.font_manager as fm
9
  from scipy import stats
10
  import os
 
11
 
12
  #μ‚¬μ΄μ¦ˆ 크게
13
  st.set_page_config(layout="wide")
@@ -177,6 +178,69 @@ def plot_correlation_heatmap(data):
177
  else:
178
  st.warning("상관관계 νžˆνŠΈλ§΅μ„ 그릴 수 μžˆλŠ” μˆ«μžν˜• 열이 μ—†μŠ΅λ‹ˆλ‹€.")
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def plot_scatter_with_regression(data, x_var, y_var):
181
  # νšŒκ·€ 뢄석 μˆ˜ν–‰
182
  x = data[x_var]
@@ -259,12 +323,10 @@ def perform_analysis():
259
  # 데이터가 변경될 λ•Œλ§ˆλ‹€ ν•„ν„°λ§λœ 데이터 μ—…λ°μ΄νŠΈ
260
  st.session_state.filtered_data = apply_slicers(st.session_state.processed_data)
261
 
262
- # 2μ—΄ λ ˆμ΄μ•„μ›ƒ 생성
263
- col1, col2 = st.columns(2)
264
-
265
 
266
  with col1:
267
-
268
  # μš”μ•½ 톡계
269
  st.write("μš”μ•½ 톡계:")
270
  st.write(st.session_state.filtered_data.describe())
@@ -274,7 +336,6 @@ def perform_analysis():
274
  plot_correlation_heatmap(st.session_state.filtered_data)
275
 
276
  with col2:
277
-
278
  # μ‚¬μš©μžκ°€ μ„ νƒν•œ 두 λ³€μˆ˜μ— λŒ€ν•œ 산점도 및 νšŒκ·€ 뢄석
279
  st.subheader("두 λ³€μˆ˜ κ°„μ˜ 관계 뢄석")
280
  x_var = st.selectbox("XμΆ• λ³€μˆ˜ 선택", options=st.session_state.numeric_columns, key='x_var')
@@ -283,11 +344,41 @@ def perform_analysis():
283
  if x_var and y_var:
284
  plot_scatter_with_regression(st.session_state.filtered_data, x_var, y_var)
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  # 'λ‹€λ₯Έ 데이터 λΆ„μ„ν•˜κΈ°' λ²„νŠΌ μΆ”κ°€
287
  if st.button("λ‹€λ₯Έ 데이터 λΆ„μ„ν•˜κΈ°(였λ₯˜κ°€ λ‚˜λ©΄ λ‹€μ‹œ λˆŒλŸ¬μ£Όμ„Έμš”)"):
288
  reset_session_state()
289
  st.experimental_rerun()
290
-
291
  def main():
292
  st.title("λͺ¨λ‘κ°€ ν•  수 μžˆλŠ” 데이터 뢄석 νˆ΄ν‚·")
293
 
 
8
  import matplotlib.font_manager as fm
9
  from scipy import stats
10
  import os
11
+ import plotly.figure_factory as ff
12
 
13
  #μ‚¬μ΄μ¦ˆ 크게
14
  st.set_page_config(layout="wide")
 
178
  else:
179
  st.warning("상관관계 νžˆνŠΈλ§΅μ„ 그릴 수 μžˆλŠ” μˆ«μžν˜• 열이 μ—†μŠ΅λ‹ˆλ‹€.")
180
 
181
+ def check_normality(data, column):
182
+ # μ‹œκ°μ  검사: Q-Q plot
183
+ fig = go.Figure()
184
+ qq = stats.probplot(data[column], dist="norm")
185
+ fig.add_trace(go.Scatter(x=qq[0][0], y=qq[0][1], mode='markers', name='Sample Quantiles'))
186
+ fig.add_trace(go.Scatter(x=qq[0][0], y=qq[1][0] * qq[0][0] + qq[1][1], mode='lines', name='Theoretical Quantiles'))
187
+ fig.update_layout(title=f'Q-Q Plot for {column}', xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles')
188
+ st.plotly_chart(fig)
189
+
190
+ # 톡계적 검사: Shapiro-Wilk test
191
+ stat, p = stats.shapiro(data[column])
192
+ st.write(f"Shapiro-Wilk Test for {column}:")
193
+ st.write(f"ν†΅κ³„λŸ‰: {stat:.4f}")
194
+ st.write(f"p-value: {p:.4f}")
195
+ if p > 0.05:
196
+ st.write("데이터가 μ •κ·œ 뢄포λ₯Ό λ”°λ₯΄λŠ” κ²ƒμœΌλ‘œ λ³΄μž…λ‹ˆλ‹€ (귀무가섀을 κΈ°κ°ν•˜μ§€ λͺ»ν•¨)")
197
+ else:
198
+ st.write("데이터가 μ •κ·œ 뢄포λ₯Ό λ”°λ₯΄μ§€ μ•ŠλŠ” κ²ƒμœΌλ‘œ λ³΄μž…λ‹ˆλ‹€ (귀무가섀 기각)")
199
+
200
+ def perform_independent_ttest(data, group_column, value_column):
201
+ groups = data[group_column].unique()
202
+ if len(groups) != 2:
203
+ st.error("독립 ν‘œλ³Έ t-검정은 μ •ν™•νžˆ 두 그룹이 ν•„μš”ν•©λ‹ˆλ‹€.")
204
+ return
205
+
206
+ group1 = data[data[group_column] == groups[0]][value_column]
207
+ group2 = data[data[group_column] == groups[1]][value_column]
208
+
209
+ t_stat, p_value = stats.ttest_ind(group1, group2)
210
+
211
+ st.write(f"독립 ν‘œλ³Έ T-κ²€μ • κ²°κ³Ό:")
212
+ st.write(f"t-ν†΅κ³„λŸ‰: {t_stat:.4f}")
213
+ st.write(f"p-value: {p_value:.4f}")
214
+
215
+ if p_value < 0.05:
216
+ st.write("두 κ·Έλ£Ή 간에 μœ μ˜ν•œ 차이가 μžˆμŠ΅λ‹ˆλ‹€.")
217
+ else:
218
+ st.write("두 κ·Έλ£Ή 간에 μœ μ˜ν•œ 차이가 μ—†μŠ΅λ‹ˆλ‹€.")
219
+
220
+ def perform_paired_ttest(data, before_column, after_column):
221
+ t_stat, p_value = stats.ttest_rel(data[before_column], data[after_column])
222
+
223
+ st.write(f"λŒ€μ‘ ν‘œλ³Έ T-κ²€μ • κ²°κ³Ό:")
224
+ st.write(f"t-ν†΅κ³„λŸ‰: {t_stat:.4f}")
225
+ st.write(f"p-value: {p_value:.4f}")
226
+
227
+ if p_value < 0.05:
228
+ st.write("μ „ν›„ μΈ‘μ •κ°’ 간에 μœ μ˜ν•œ 차이가 μžˆμŠ΅λ‹ˆλ‹€.")
229
+ else:
230
+ st.write("μ „ν›„ μΈ‘μ •κ°’ 간에 μœ μ˜ν•œ 차이가 μ—†μŠ΅λ‹ˆλ‹€.")
231
+
232
+ def perform_onesample_ttest(data, column, test_value):
233
+ t_stat, p_value = stats.ttest_1samp(data[column], test_value)
234
+
235
+ st.write(f"단일 ν‘œλ³Έ T-κ²€μ • κ²°κ³Ό:")
236
+ st.write(f"t-ν†΅κ³„λŸ‰: {t_stat:.4f}")
237
+ st.write(f"p-value: {p_value:.4f}")
238
+
239
+ if p_value < 0.05:
240
+ st.write(f"ν‘œλ³Έ 평균이 {test_value}와 μœ μ˜ν•˜κ²Œ λ‹€λ¦…λ‹ˆλ‹€.")
241
+ else:
242
+ st.write(f"ν‘œλ³Έ 평균이 {test_value}와 μœ μ˜ν•˜κ²Œ λ‹€λ₯΄μ§€ μ•ŠμŠ΅λ‹ˆλ‹€.")
243
+
244
  def plot_scatter_with_regression(data, x_var, y_var):
245
  # νšŒκ·€ 뢄석 μˆ˜ν–‰
246
  x = data[x_var]
 
323
  # 데이터가 변경될 λ•Œλ§ˆλ‹€ ν•„ν„°λ§λœ 데이터 μ—…λ°μ΄νŠΈ
324
  st.session_state.filtered_data = apply_slicers(st.session_state.processed_data)
325
 
326
+ # 3μ—΄ λ ˆμ΄μ•„μ›ƒ 생성
327
+ col1, col2, col3 = st.columns(3)
 
328
 
329
  with col1:
 
330
  # μš”μ•½ 톡계
331
  st.write("μš”μ•½ 톡계:")
332
  st.write(st.session_state.filtered_data.describe())
 
336
  plot_correlation_heatmap(st.session_state.filtered_data)
337
 
338
  with col2:
 
339
  # μ‚¬μš©μžκ°€ μ„ νƒν•œ 두 λ³€μˆ˜μ— λŒ€ν•œ 산점도 및 νšŒκ·€ 뢄석
340
  st.subheader("두 λ³€μˆ˜ κ°„μ˜ 관계 뢄석")
341
  x_var = st.selectbox("XμΆ• λ³€μˆ˜ 선택", options=st.session_state.numeric_columns, key='x_var')
 
344
  if x_var and y_var:
345
  plot_scatter_with_regression(st.session_state.filtered_data, x_var, y_var)
346
 
347
+ with col3:
348
+ st.subheader("톡계적 κ²€μ •")
349
+
350
+ # μ •κ·œμ„± κ²€μ •
351
+ st.write("μ •κ·œμ„± κ²€μ •")
352
+ normality_column = st.selectbox("μ •κ·œμ„± 검정을 μˆ˜ν–‰ν•  μ—΄ 선택:", st.session_state.numeric_columns, key='normality_column')
353
+ if st.button("μ •κ·œμ„± κ²€μ • μˆ˜ν–‰"):
354
+ check_normality(st.session_state.filtered_data, normality_column)
355
+
356
+ # T-κ²€μ •
357
+ st.write("T-κ²€μ •")
358
+ test_type = st.radio("T-κ²€μ • μœ ν˜• 선택:", ["독립 ν‘œλ³Έ", "λŒ€μ‘ ν‘œλ³Έ", "단일 ν‘œλ³Έ"])
359
+
360
+ if test_type == "독립 ν‘œλ³Έ":
361
+ group_column = st.selectbox("κ·Έλ£Ή μ—΄ 선택:", st.session_state.categorical_columns)
362
+ value_column = st.selectbox("κ°’ μ—΄ 선택:", st.session_state.numeric_columns)
363
+ if st.button("독립 ν‘œλ³Έ T-κ²€μ • μˆ˜ν–‰"):
364
+ perform_independent_ttest(st.session_state.filtered_data, group_column, value_column)
365
+
366
+ elif test_type == "λŒ€μ‘ ν‘œλ³Έ":
367
+ before_column = st.selectbox("'이전' μ—΄ 선택:", st.session_state.numeric_columns)
368
+ after_column = st.selectbox("'이후' μ—΄ 선택:", [col for col in st.session_state.numeric_columns if col != before_column])
369
+ if st.button("λŒ€μ‘ ν‘œλ³Έ T-κ²€μ • μˆ˜ν–‰"):
370
+ perform_paired_ttest(st.session_state.filtered_data, before_column, after_column)
371
+
372
+ elif test_type == "단일 ν‘œλ³Έ":
373
+ test_column = st.selectbox("κ²€μ •ν•  μ—΄ 선택:", st.session_state.numeric_columns)
374
+ test_value = st.number_input("κ²€μ • κ°’ μž…λ ₯:")
375
+ if st.button("단일 ν‘œλ³Έ T-κ²€μ • μˆ˜ν–‰"):
376
+ perform_onesample_ttest(st.session_state.filtered_data, test_column, test_value)
377
+
378
  # 'λ‹€λ₯Έ 데이터 λΆ„μ„ν•˜κΈ°' λ²„νŠΌ μΆ”κ°€
379
  if st.button("λ‹€λ₯Έ 데이터 λΆ„μ„ν•˜κΈ°(였λ₯˜κ°€ λ‚˜λ©΄ λ‹€μ‹œ λˆŒλŸ¬μ£Όμ„Έμš”)"):
380
  reset_session_state()
381
  st.experimental_rerun()
 
382
  def main():
383
  st.title("λͺ¨λ‘κ°€ ν•  수 μžˆλŠ” 데이터 뢄석 νˆ΄ν‚·")
384