JUNGU commited on
Commit
71227fd
Β·
verified Β·
1 Parent(s): d709da9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -96
app.py CHANGED
@@ -72,16 +72,38 @@ def preprocess_data(data):
72
 
73
  return data
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def perform_analysis(data):
76
  st.header("탐색적 데이터 뢄석")
77
 
 
 
 
 
 
 
78
  # μš”μ•½ 톡계
79
  st.write("μš”μ•½ 톡계:")
80
- st.write(data.describe())
81
 
82
  # 상관관계 히트맡
83
  st.write("상관관계 히트맡:")
84
- numeric_data = data.select_dtypes(include=['float64', 'int64'])
85
  if not numeric_data.empty:
86
  fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
87
  fig.update_layout(title='상관관계 히트맡')
@@ -89,28 +111,14 @@ def perform_analysis(data):
89
  else:
90
  st.write("상관관계 νžˆνŠΈλ§΅μ„ 그릴 수 μžˆλŠ” μˆ«μžν˜• 열이 μ—†μŠ΅λ‹ˆλ‹€.")
91
 
92
- # κ³Όλͺ©λ³„ 점수 뢄포
93
- if 'κ³Όλͺ©' in data.columns and 'ν•™μŠ΅ν‰κ°€' in data.columns:
94
- st.write("κ³Όλͺ©λ³„ 점수 뢄포:")
95
- fig = px.box(data, x='κ³Όλͺ©', y='ν•™μŠ΅ν‰κ°€', points="all")
96
- fig.update_layout(title='κ³Όλͺ©λ³„ ν•™μŠ΅ν‰κ°€ 점수 뢄포')
97
- st.plotly_chart(fig)
98
-
99
- # 월별 점수 좔이
100
- if '달' in data.columns and 'ν•™μŠ΅ν‰κ°€' in data.columns:
101
- st.write("월별 점수 좔이:")
102
- fig = px.line(data, x='달', y='ν•™μŠ΅ν‰κ°€', color='κ³Όλͺ©', markers=True)
103
- fig.update_layout(title='월별 ν•™μŠ΅ν‰κ°€ 점수 좔이')
104
- st.plotly_chart(fig)
105
-
106
- # μžκΈ°λ…Έλ ₯도와 ν•™μŠ΅ν‰κ°€ 관계 (νšŒκ·€μ„ κ³Ό R-squared μΆ”κ°€)
107
- if 'μžκΈ°λ…Έλ ₯도' in data.columns and 'ν•™μŠ΅ν‰κ°€' in data.columns:
108
- st.write("μžκΈ°λ…Έλ ₯도와 ν•™μŠ΅ν‰κ°€ 관계:")
109
- fig = px.scatter(data, x='μžκΈ°λ…Έλ ₯도', y='ν•™μŠ΅ν‰κ°€', color='κ³Όλͺ©', hover_data=['달'])
110
 
111
  # 전체 데이터에 λŒ€ν•œ νšŒκ·€μ„  μΆ”κ°€
112
- x = data['μžκΈ°λ…Έλ ₯도']
113
- y = data['ν•™μŠ΅ν‰κ°€']
114
  slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
115
  line_x = np.array([x.min(), x.max()])
116
  line_y = slope * line_x + intercept
@@ -118,7 +126,7 @@ def perform_analysis(data):
118
 
119
  r_squared = r_value ** 2
120
  fig.update_layout(
121
- title=f'μžκΈ°λ…Έλ ₯도와 ν•™μŠ΅ν‰κ°€ 관계 (R-squared: {r_squared:.4f})',
122
  annotations=[
123
  dict(
124
  x=0.5,
@@ -132,79 +140,20 @@ def perform_analysis(data):
132
  )
133
  st.plotly_chart(fig)
134
 
135
- # μΈν„°λž™ν‹°λΈŒ 필터링
136
- st.write("μΈν„°λž™ν‹°λΈŒ 필터링:")
137
- if 'μžκΈ°λ…Έλ ₯도' in data.columns:
138
- min_effort = int(data['μžκΈ°λ…Έλ ₯도'].min())
139
- max_effort = int(data['μžκΈ°λ…Έλ ₯도'].max())
140
- effort_range = st.slider("μžκΈ°λ…Έλ ₯도 λ²”μœ„ 선택", min_effort, max_effort, (min_effort, max_effort))
141
-
142
- filtered_data = data[(data['μžκΈ°λ…Έλ ₯도'] >= effort_range[0]) & (data['μžκΈ°λ…Έλ ₯도'] <= effort_range[1])]
143
-
144
- if 'κ³Όλͺ©' in filtered_data.columns and 'ν•™μŠ΅ν‰κ°€' in filtered_data.columns:
145
- fig = px.scatter(filtered_data, x='μžκΈ°λ…Έλ ₯도', y='ν•™μŠ΅ν‰κ°€', color='κ³Όλͺ©', hover_data=['달'])
146
-
147
- # ν•„ν„°λ§λœ 데이터에 λŒ€ν•œ νšŒκ·€μ„  μΆ”κ°€
148
- x = filtered_data['μžκΈ°λ…Έλ ₯도']
149
- y = filtered_data['ν•™μŠ΅ν‰κ°€']
150
- slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
151
- line_x = np.array([x.min(), x.max()])
152
- line_y = slope * line_x + intercept
153
- fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νšŒκ·€μ„ '))
154
-
155
- r_squared = r_value ** 2
156
- fig.update_layout(
157
- title=f'μžκΈ°λ…Έλ ₯도 {effort_range[0]}-{effort_range[1]} λ²”μœ„μ˜ ν•™μŠ΅ν‰κ°€ 관계 (R-squared: {r_squared:.4f})',
158
- annotations=[
159
- dict(
160
- x=0.5,
161
- y=1.05,
162
- xref='paper',
163
- yref='paper',
164
- text=f'R-squared: {r_squared:.4f}',
165
- showarrow=False,
166
- )
167
- ]
168
- )
169
- st.plotly_chart(fig)
170
-
171
- # κ³Όλͺ©λ³„ 상세 뢄석
172
- if 'κ³Όλͺ©' in data.columns:
173
- st.write("κ³Όλͺ©λ³„ 상세 뢄석:")
174
- selected_subject = st.selectbox("뢄석할 κ³Όλͺ© 선택", data['κ³Όλͺ©'].unique())
175
- subject_data = data[data['κ³Όλͺ©'] == selected_subject]
176
-
177
- if '달' in subject_data.columns and 'ν•™μŠ΅ν‰κ°€' in subject_data.columns:
178
- fig = px.line(subject_data, x='달', y='ν•™μŠ΅ν‰κ°€', markers=True)
179
- fig.update_layout(title=f'{selected_subject} 월별 ν•™μŠ΅ν‰κ°€ 점수 좔이')
180
- st.plotly_chart(fig)
181
-
182
- if 'μžκΈ°λ…Έλ ₯도' in subject_data.columns and 'ν•™μŠ΅ν‰κ°€' in subject_data.columns:
183
- fig = px.scatter(subject_data, x='μžκΈ°λ…Έλ ₯도', y='ν•™μŠ΅ν‰κ°€', hover_data=['달'])
184
-
185
- # μ„ νƒλœ κ³Όλͺ©μ— λŒ€ν•œ νšŒκ·€μ„  μΆ”κ°€
186
- x = subject_data['μžκΈ°λ…Έλ ₯도']
187
- y = subject_data['ν•™μŠ΅ν‰κ°€']
188
- slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
189
- line_x = np.array([x.min(), x.max()])
190
- line_y = slope * line_x + intercept
191
- fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νšŒκ·€μ„ '))
192
-
193
- r_squared = r_value ** 2
194
- fig.update_layout(
195
- title=f'{selected_subject} μžκΈ°λ…Έλ ₯도와 ν•™μŠ΅ν‰κ°€ 관계 (R-squared: {r_squared:.4f})',
196
- annotations=[
197
- dict(
198
- x=0.5,
199
- y=1.05,
200
- xref='paper',
201
- yref='paper',
202
- text=f'R-squared: {r_squared:.4f}',
203
- showarrow=False,
204
- )
205
- ]
206
- )
207
- st.plotly_chart(fig)
208
 
209
  def main():
210
  st.title("μΈν„°λž™ν‹°λΈŒ EDA νˆ΄ν‚·")
 
72
 
73
  return data
74
 
75
+ def create_slicers(data):
76
+ slicers = {}
77
+ categorical_columns = data.select_dtypes(include=['object', 'category']).columns
78
+
79
+ for col in categorical_columns:
80
+ if data[col].nunique() <= 10: # κ³ μœ κ°’μ΄ 10개 μ΄ν•˜μΈ κ²½μš°μ—λ§Œ μŠ¬λΌμ΄μ„œ 생성
81
+ slicers[col] = st.multiselect(f"{col} 선택", options=sorted(data[col].unique()), default=sorted(data[col].unique()))
82
+
83
+ return slicers
84
+
85
+ def apply_slicers(data, slicers):
86
+ for col, selected_values in slicers.items():
87
+ if selected_values:
88
+ data = data[data[col].isin(selected_values)]
89
+ return data
90
+
91
  def perform_analysis(data):
92
  st.header("탐색적 데이터 뢄석")
93
 
94
+ # μŠ¬λΌμ΄μ„œ 생성
95
+ slicers = create_slicers(data)
96
+
97
+ # μŠ¬λΌμ΄μ„œ 적용
98
+ filtered_data = apply_slicers(data, slicers)
99
+
100
  # μš”μ•½ 톡계
101
  st.write("μš”μ•½ 톡계:")
102
+ st.write(filtered_data.describe())
103
 
104
  # 상관관계 히트맡
105
  st.write("상관관계 히트맡:")
106
+ numeric_data = filtered_data.select_dtypes(include=['float64', 'int64'])
107
  if not numeric_data.empty:
108
  fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
109
  fig.update_layout(title='상관관계 히트맡')
 
111
  else:
112
  st.write("상관관계 νžˆνŠΈλ§΅μ„ 그릴 수 μžˆλŠ” μˆ«μžν˜• 열이 μ—†μŠ΅λ‹ˆλ‹€.")
113
 
114
+ # μΆœμ„μΌμˆ˜μ™€ 성적 관계 뢄석
115
+ if 'μΆœμ„μΌμˆ˜' in filtered_data.columns and '성적' in filtered_data.columns:
116
+ st.write("μΆœμ„μΌμˆ˜μ™€ 성적 관계:")
117
+ fig = px.scatter(filtered_data, x='μΆœμ„μΌμˆ˜', y='성적', color='반', hover_data=filtered_data.columns)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  # 전체 데이터에 λŒ€ν•œ νšŒκ·€μ„  μΆ”κ°€
120
+ x = filtered_data['μΆœμ„μΌμˆ˜']
121
+ y = filtered_data['성적']
122
  slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
123
  line_x = np.array([x.min(), x.max()])
124
  line_y = slope * line_x + intercept
 
126
 
127
  r_squared = r_value ** 2
128
  fig.update_layout(
129
+ title=f'μΆœμ„μΌμˆ˜μ™€ 성적 관계 (R-squared: {r_squared:.4f})',
130
  annotations=[
131
  dict(
132
  x=0.5,
 
140
  )
141
  st.plotly_chart(fig)
142
 
143
+ # λ°˜λ³„ 성적 뢄포
144
+ if '반' in filtered_data.columns and '성적' in filtered_data.columns:
145
+ st.write("λ°˜λ³„ 성적 뢄포:")
146
+ fig = px.box(filtered_data, x='반', y='성적', points="all")
147
+ fig.update_layout(title='λ°˜λ³„ 성적 뢄포')
148
+ st.plotly_chart(fig)
149
+
150
+ # μΆœμ„μΌμˆ˜ ꡬ간별 성적 뢄포
151
+ if 'μΆœμ„μΌμˆ˜' in filtered_data.columns and '성적' in filtered_data.columns:
152
+ st.write("μΆœμ„μΌμˆ˜ ꡬ간별 성적 뢄포:")
153
+ filtered_data['μΆœμ„μΌμˆ˜_ꡬ간'] = pd.cut(filtered_data['μΆœμ„μΌμˆ˜'], bins=5)
154
+ fig = px.box(filtered_data, x='μΆœμ„μΌμˆ˜_ꡬ간', y='성적', color='반')
155
+ fig.update_layout(title='μΆœμ„μΌμˆ˜ ꡬ간별 성적 뢄포')
156
+ st.plotly_chart(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  def main():
159
  st.title("μΈν„°λž™ν‹°λΈŒ EDA νˆ΄ν‚·")