Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import openpyxl
|
|
8 |
import matplotlib.font_manager as fm
|
9 |
from scipy import stats
|
10 |
import os
|
|
|
11 |
|
12 |
#μ¬μ΄μ¦ ν¬κ²
|
13 |
st.set_page_config(layout="wide")
|
@@ -177,6 +178,69 @@ def plot_correlation_heatmap(data):
|
|
177 |
else:
|
178 |
st.warning("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.")
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
def plot_scatter_with_regression(data, x_var, y_var):
|
181 |
# νκ· λΆμ μν
|
182 |
x = data[x_var]
|
@@ -259,12 +323,10 @@ def perform_analysis():
|
|
259 |
# λ°μ΄ν°κ° λ³κ²½λ λλ§λ€ νν°λ§λ λ°μ΄ν° μ
λ°μ΄νΈ
|
260 |
st.session_state.filtered_data = apply_slicers(st.session_state.processed_data)
|
261 |
|
262 |
-
#
|
263 |
-
col1, col2 = st.columns(
|
264 |
-
|
265 |
|
266 |
with col1:
|
267 |
-
|
268 |
# μμ½ ν΅κ³
|
269 |
st.write("μμ½ ν΅κ³:")
|
270 |
st.write(st.session_state.filtered_data.describe())
|
@@ -274,7 +336,6 @@ def perform_analysis():
|
|
274 |
plot_correlation_heatmap(st.session_state.filtered_data)
|
275 |
|
276 |
with col2:
|
277 |
-
|
278 |
# μ¬μ©μκ° μ νν λ λ³μμ λν μ°μ λ λ° νκ· λΆμ
|
279 |
st.subheader("λ λ³μ κ°μ κ΄κ³ λΆμ")
|
280 |
x_var = st.selectbox("XμΆ λ³μ μ ν", options=st.session_state.numeric_columns, key='x_var')
|
@@ -283,11 +344,41 @@ def perform_analysis():
|
|
283 |
if x_var and y_var:
|
284 |
plot_scatter_with_regression(st.session_state.filtered_data, x_var, y_var)
|
285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
# 'λ€λ₯Έ λ°μ΄ν° λΆμνκΈ°' λ²νΌ μΆκ°
|
287 |
if st.button("λ€λ₯Έ λ°μ΄ν° λΆμνκΈ°(μ€λ₯κ° λλ©΄ λ€μ λλ¬μ£ΌμΈμ)"):
|
288 |
reset_session_state()
|
289 |
st.experimental_rerun()
|
290 |
-
|
291 |
def main():
|
292 |
st.title("λͺ¨λκ° ν μ μλ λ°μ΄ν° λΆμ ν΄ν·")
|
293 |
|
|
|
8 |
import matplotlib.font_manager as fm
|
9 |
from scipy import stats
|
10 |
import os
|
11 |
+
import plotly.figure_factory as ff
|
12 |
|
13 |
#μ¬μ΄μ¦ ν¬κ²
|
14 |
st.set_page_config(layout="wide")
|
|
|
178 |
else:
|
179 |
st.warning("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.")
|
180 |
|
181 |
+
def check_normality(data, column):
|
182 |
+
# μκ°μ κ²μ¬: Q-Q plot
|
183 |
+
fig = go.Figure()
|
184 |
+
qq = stats.probplot(data[column], dist="norm")
|
185 |
+
fig.add_trace(go.Scatter(x=qq[0][0], y=qq[0][1], mode='markers', name='Sample Quantiles'))
|
186 |
+
fig.add_trace(go.Scatter(x=qq[0][0], y=qq[1][0] * qq[0][0] + qq[1][1], mode='lines', name='Theoretical Quantiles'))
|
187 |
+
fig.update_layout(title=f'Q-Q Plot for {column}', xaxis_title='Theoretical Quantiles', yaxis_title='Sample Quantiles')
|
188 |
+
st.plotly_chart(fig)
|
189 |
+
|
190 |
+
# ν΅κ³μ κ²μ¬: Shapiro-Wilk test
|
191 |
+
stat, p = stats.shapiro(data[column])
|
192 |
+
st.write(f"Shapiro-Wilk Test for {column}:")
|
193 |
+
st.write(f"ν΅κ³λ: {stat:.4f}")
|
194 |
+
st.write(f"p-value: {p:.4f}")
|
195 |
+
if p > 0.05:
|
196 |
+
st.write("λ°μ΄ν°κ° μ κ· λΆν¬λ₯Ό λ°λ₯΄λ κ²μΌλ‘ 보μ
λλ€ (κ·λ¬΄κ°μ€μ κΈ°κ°νμ§ λͺ»ν¨)")
|
197 |
+
else:
|
198 |
+
st.write("λ°μ΄ν°κ° μ κ· λΆν¬λ₯Ό λ°λ₯΄μ§ μλ κ²μΌλ‘ 보μ
λλ€ (κ·λ¬΄κ°μ€ κΈ°κ°)")
|
199 |
+
|
200 |
+
def perform_independent_ttest(data, group_column, value_column):
|
201 |
+
groups = data[group_column].unique()
|
202 |
+
if len(groups) != 2:
|
203 |
+
st.error("λ
립 νλ³Έ t-κ²μ μ μ νν λ κ·Έλ£Ήμ΄ νμν©λλ€.")
|
204 |
+
return
|
205 |
+
|
206 |
+
group1 = data[data[group_column] == groups[0]][value_column]
|
207 |
+
group2 = data[data[group_column] == groups[1]][value_column]
|
208 |
+
|
209 |
+
t_stat, p_value = stats.ttest_ind(group1, group2)
|
210 |
+
|
211 |
+
st.write(f"λ
립 νλ³Έ T-κ²μ κ²°κ³Ό:")
|
212 |
+
st.write(f"t-ν΅κ³λ: {t_stat:.4f}")
|
213 |
+
st.write(f"p-value: {p_value:.4f}")
|
214 |
+
|
215 |
+
if p_value < 0.05:
|
216 |
+
st.write("λ κ·Έλ£Ή κ°μ μ μν μ°¨μ΄κ° μμ΅λλ€.")
|
217 |
+
else:
|
218 |
+
st.write("λ κ·Έλ£Ή κ°μ μ μν μ°¨μ΄κ° μμ΅λλ€.")
|
219 |
+
|
220 |
+
def perform_paired_ttest(data, before_column, after_column):
|
221 |
+
t_stat, p_value = stats.ttest_rel(data[before_column], data[after_column])
|
222 |
+
|
223 |
+
st.write(f"λμ νλ³Έ T-κ²μ κ²°κ³Ό:")
|
224 |
+
st.write(f"t-ν΅κ³λ: {t_stat:.4f}")
|
225 |
+
st.write(f"p-value: {p_value:.4f}")
|
226 |
+
|
227 |
+
if p_value < 0.05:
|
228 |
+
st.write("μ ν μΈ‘μ κ° κ°μ μ μν μ°¨μ΄κ° μμ΅λλ€.")
|
229 |
+
else:
|
230 |
+
st.write("μ ν μΈ‘μ κ° κ°μ μ μν μ°¨μ΄κ° μμ΅λλ€.")
|
231 |
+
|
232 |
+
def perform_onesample_ttest(data, column, test_value):
|
233 |
+
t_stat, p_value = stats.ttest_1samp(data[column], test_value)
|
234 |
+
|
235 |
+
st.write(f"λ¨μΌ νλ³Έ T-κ²μ κ²°κ³Ό:")
|
236 |
+
st.write(f"t-ν΅κ³λ: {t_stat:.4f}")
|
237 |
+
st.write(f"p-value: {p_value:.4f}")
|
238 |
+
|
239 |
+
if p_value < 0.05:
|
240 |
+
st.write(f"νλ³Έ νκ· μ΄ {test_value}μ μ μνκ² λ€λ¦
λλ€.")
|
241 |
+
else:
|
242 |
+
st.write(f"νλ³Έ νκ· μ΄ {test_value}μ μ μνκ² λ€λ₯΄μ§ μμ΅λλ€.")
|
243 |
+
|
244 |
def plot_scatter_with_regression(data, x_var, y_var):
|
245 |
# νκ· λΆμ μν
|
246 |
x = data[x_var]
|
|
|
323 |
# λ°μ΄ν°κ° λ³κ²½λ λλ§λ€ νν°λ§λ λ°μ΄ν° μ
λ°μ΄νΈ
|
324 |
st.session_state.filtered_data = apply_slicers(st.session_state.processed_data)
|
325 |
|
326 |
+
# 3μ΄ λ μ΄μμ μμ±
|
327 |
+
col1, col2, col3 = st.columns(3)
|
|
|
328 |
|
329 |
with col1:
|
|
|
330 |
# μμ½ ν΅κ³
|
331 |
st.write("μμ½ ν΅κ³:")
|
332 |
st.write(st.session_state.filtered_data.describe())
|
|
|
336 |
plot_correlation_heatmap(st.session_state.filtered_data)
|
337 |
|
338 |
with col2:
|
|
|
339 |
# μ¬μ©μκ° μ νν λ λ³μμ λν μ°μ λ λ° νκ· λΆμ
|
340 |
st.subheader("λ λ³μ κ°μ κ΄κ³ λΆμ")
|
341 |
x_var = st.selectbox("XμΆ λ³μ μ ν", options=st.session_state.numeric_columns, key='x_var')
|
|
|
344 |
if x_var and y_var:
|
345 |
plot_scatter_with_regression(st.session_state.filtered_data, x_var, y_var)
|
346 |
|
347 |
+
with col3:
|
348 |
+
st.subheader("ν΅κ³μ κ²μ ")
|
349 |
+
|
350 |
+
# μ κ·μ± κ²μ
|
351 |
+
st.write("μ κ·μ± κ²μ ")
|
352 |
+
normality_column = st.selectbox("μ κ·μ± κ²μ μ μνν μ΄ μ ν:", st.session_state.numeric_columns, key='normality_column')
|
353 |
+
if st.button("μ κ·μ± κ²μ μν"):
|
354 |
+
check_normality(st.session_state.filtered_data, normality_column)
|
355 |
+
|
356 |
+
# T-κ²μ
|
357 |
+
st.write("T-κ²μ ")
|
358 |
+
test_type = st.radio("T-κ²μ μ ν μ ν:", ["λ
립 νλ³Έ", "λμ νλ³Έ", "λ¨μΌ νλ³Έ"])
|
359 |
+
|
360 |
+
if test_type == "λ
립 νλ³Έ":
|
361 |
+
group_column = st.selectbox("κ·Έλ£Ή μ΄ μ ν:", st.session_state.categorical_columns)
|
362 |
+
value_column = st.selectbox("κ° μ΄ μ ν:", st.session_state.numeric_columns)
|
363 |
+
if st.button("λ
립 νλ³Έ T-κ²μ μν"):
|
364 |
+
perform_independent_ttest(st.session_state.filtered_data, group_column, value_column)
|
365 |
+
|
366 |
+
elif test_type == "λμ νλ³Έ":
|
367 |
+
before_column = st.selectbox("'μ΄μ ' μ΄ μ ν:", st.session_state.numeric_columns)
|
368 |
+
after_column = st.selectbox("'μ΄ν' μ΄ μ ν:", [col for col in st.session_state.numeric_columns if col != before_column])
|
369 |
+
if st.button("λμ νλ³Έ T-κ²μ μν"):
|
370 |
+
perform_paired_ttest(st.session_state.filtered_data, before_column, after_column)
|
371 |
+
|
372 |
+
elif test_type == "λ¨μΌ νλ³Έ":
|
373 |
+
test_column = st.selectbox("κ²μ ν μ΄ μ ν:", st.session_state.numeric_columns)
|
374 |
+
test_value = st.number_input("κ²μ κ° μ
λ ₯:")
|
375 |
+
if st.button("λ¨μΌ νλ³Έ T-κ²μ μν"):
|
376 |
+
perform_onesample_ttest(st.session_state.filtered_data, test_column, test_value)
|
377 |
+
|
378 |
# 'λ€λ₯Έ λ°μ΄ν° λΆμνκΈ°' λ²νΌ μΆκ°
|
379 |
if st.button("λ€λ₯Έ λ°μ΄ν° λΆμνκΈ°(μ€λ₯κ° λλ©΄ λ€μ λλ¬μ£ΌμΈμ)"):
|
380 |
reset_session_state()
|
381 |
st.experimental_rerun()
|
|
|
382 |
def main():
|
383 |
st.title("λͺ¨λκ° ν μ μλ λ°μ΄ν° λΆμ ν΄ν·")
|
384 |
|