Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -72,16 +72,38 @@ def preprocess_data(data):
|
|
72 |
|
73 |
return data
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
def perform_analysis(data):
|
76 |
st.header("νμμ λ°μ΄ν° λΆμ")
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
# μμ½ ν΅κ³
|
79 |
st.write("μμ½ ν΅κ³:")
|
80 |
-
st.write(
|
81 |
|
82 |
# μκ΄κ΄κ³ ννΈλ§΅
|
83 |
st.write("μκ΄κ΄κ³ ννΈλ§΅:")
|
84 |
-
numeric_data =
|
85 |
if not numeric_data.empty:
|
86 |
fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
|
87 |
fig.update_layout(title='μκ΄κ΄κ³ ννΈλ§΅')
|
@@ -89,28 +111,14 @@ def perform_analysis(data):
|
|
89 |
else:
|
90 |
st.write("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.")
|
91 |
|
92 |
-
#
|
93 |
-
if '
|
94 |
-
st.write("
|
95 |
-
fig = px.
|
96 |
-
fig.update_layout(title='κ³Όλͺ©λ³ νμ΅νκ° μ μ λΆν¬')
|
97 |
-
st.plotly_chart(fig)
|
98 |
-
|
99 |
-
# μλ³ μ μ μΆμ΄
|
100 |
-
if 'λ¬' in data.columns and 'νμ΅νκ°' in data.columns:
|
101 |
-
st.write("μλ³ μ μ μΆμ΄:")
|
102 |
-
fig = px.line(data, x='λ¬', y='νμ΅νκ°', color='κ³Όλͺ©', markers=True)
|
103 |
-
fig.update_layout(title='μλ³ νμ΅νκ° μ μ μΆμ΄')
|
104 |
-
st.plotly_chart(fig)
|
105 |
-
|
106 |
-
# μκΈ°λ
Έλ ₯λμ νμ΅νκ° κ΄κ³ (νκ·μ κ³Ό R-squared μΆκ°)
|
107 |
-
if 'μκΈ°λ
Έλ ₯λ' in data.columns and 'νμ΅νκ°' in data.columns:
|
108 |
-
st.write("μκΈ°λ
Έλ ₯λμ νμ΅νκ° κ΄κ³:")
|
109 |
-
fig = px.scatter(data, x='μκΈ°λ
Έλ ₯λ', y='νμ΅νκ°', color='κ³Όλͺ©', hover_data=['λ¬'])
|
110 |
|
111 |
# μ 체 λ°μ΄ν°μ λν νκ·μ μΆκ°
|
112 |
-
x =
|
113 |
-
y =
|
114 |
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
115 |
line_x = np.array([x.min(), x.max()])
|
116 |
line_y = slope * line_x + intercept
|
@@ -118,7 +126,7 @@ def perform_analysis(data):
|
|
118 |
|
119 |
r_squared = r_value ** 2
|
120 |
fig.update_layout(
|
121 |
-
title=f'
|
122 |
annotations=[
|
123 |
dict(
|
124 |
x=0.5,
|
@@ -132,79 +140,20 @@ def perform_analysis(data):
|
|
132 |
)
|
133 |
st.plotly_chart(fig)
|
134 |
|
135 |
-
#
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
y = filtered_data['νμ΅νκ°']
|
150 |
-
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
151 |
-
line_x = np.array([x.min(), x.max()])
|
152 |
-
line_y = slope * line_x + intercept
|
153 |
-
fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νκ·μ '))
|
154 |
-
|
155 |
-
r_squared = r_value ** 2
|
156 |
-
fig.update_layout(
|
157 |
-
title=f'μκΈ°λ
Έλ ₯λ {effort_range[0]}-{effort_range[1]} λ²μμ νμ΅νκ° κ΄κ³ (R-squared: {r_squared:.4f})',
|
158 |
-
annotations=[
|
159 |
-
dict(
|
160 |
-
x=0.5,
|
161 |
-
y=1.05,
|
162 |
-
xref='paper',
|
163 |
-
yref='paper',
|
164 |
-
text=f'R-squared: {r_squared:.4f}',
|
165 |
-
showarrow=False,
|
166 |
-
)
|
167 |
-
]
|
168 |
-
)
|
169 |
-
st.plotly_chart(fig)
|
170 |
-
|
171 |
-
# κ³Όλͺ©λ³ μμΈ λΆμ
|
172 |
-
if 'κ³Όλͺ©' in data.columns:
|
173 |
-
st.write("κ³Όλͺ©λ³ μμΈ λΆμ:")
|
174 |
-
selected_subject = st.selectbox("λΆμν κ³Όλͺ© μ ν", data['κ³Όλͺ©'].unique())
|
175 |
-
subject_data = data[data['κ³Όλͺ©'] == selected_subject]
|
176 |
-
|
177 |
-
if 'λ¬' in subject_data.columns and 'νμ΅νκ°' in subject_data.columns:
|
178 |
-
fig = px.line(subject_data, x='λ¬', y='νμ΅νκ°', markers=True)
|
179 |
-
fig.update_layout(title=f'{selected_subject} μλ³ νμ΅νκ° μ μ μΆμ΄')
|
180 |
-
st.plotly_chart(fig)
|
181 |
-
|
182 |
-
if 'μκΈ°λ
Έλ ₯λ' in subject_data.columns and 'νμ΅νκ°' in subject_data.columns:
|
183 |
-
fig = px.scatter(subject_data, x='μκΈ°λ
Έλ ₯λ', y='νμ΅νκ°', hover_data=['λ¬'])
|
184 |
-
|
185 |
-
# μ νλ κ³Όλͺ©μ λν νκ·μ μΆκ°
|
186 |
-
x = subject_data['μκΈ°λ
Έλ ₯λ']
|
187 |
-
y = subject_data['νμ΅νκ°']
|
188 |
-
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
189 |
-
line_x = np.array([x.min(), x.max()])
|
190 |
-
line_y = slope * line_x + intercept
|
191 |
-
fig.add_trace(go.Scatter(x=line_x, y=line_y, mode='lines', name='νκ·μ '))
|
192 |
-
|
193 |
-
r_squared = r_value ** 2
|
194 |
-
fig.update_layout(
|
195 |
-
title=f'{selected_subject} μκΈ°λ
Έλ ₯λμ νμ΅νκ° κ΄κ³ (R-squared: {r_squared:.4f})',
|
196 |
-
annotations=[
|
197 |
-
dict(
|
198 |
-
x=0.5,
|
199 |
-
y=1.05,
|
200 |
-
xref='paper',
|
201 |
-
yref='paper',
|
202 |
-
text=f'R-squared: {r_squared:.4f}',
|
203 |
-
showarrow=False,
|
204 |
-
)
|
205 |
-
]
|
206 |
-
)
|
207 |
-
st.plotly_chart(fig)
|
208 |
|
209 |
def main():
|
210 |
st.title("μΈν°λν°λΈ EDA ν΄ν·")
|
|
|
72 |
|
73 |
return data
|
74 |
|
75 |
+
def create_slicers(data):
|
76 |
+
slicers = {}
|
77 |
+
categorical_columns = data.select_dtypes(include=['object', 'category']).columns
|
78 |
+
|
79 |
+
for col in categorical_columns:
|
80 |
+
if data[col].nunique() <= 10: # κ³ μ κ°μ΄ 10κ° μ΄νμΈ κ²½μ°μλ§ μ¬λΌμ΄μ μμ±
|
81 |
+
slicers[col] = st.multiselect(f"{col} μ ν", options=sorted(data[col].unique()), default=sorted(data[col].unique()))
|
82 |
+
|
83 |
+
return slicers
|
84 |
+
|
85 |
+
def apply_slicers(data, slicers):
|
86 |
+
for col, selected_values in slicers.items():
|
87 |
+
if selected_values:
|
88 |
+
data = data[data[col].isin(selected_values)]
|
89 |
+
return data
|
90 |
+
|
91 |
def perform_analysis(data):
|
92 |
st.header("νμμ λ°μ΄ν° λΆμ")
|
93 |
|
94 |
+
# μ¬λΌμ΄μ μμ±
|
95 |
+
slicers = create_slicers(data)
|
96 |
+
|
97 |
+
# μ¬λΌμ΄μ μ μ©
|
98 |
+
filtered_data = apply_slicers(data, slicers)
|
99 |
+
|
100 |
# μμ½ ν΅κ³
|
101 |
st.write("μμ½ ν΅κ³:")
|
102 |
+
st.write(filtered_data.describe())
|
103 |
|
104 |
# μκ΄κ΄κ³ ννΈλ§΅
|
105 |
st.write("μκ΄κ΄κ³ ννΈλ§΅:")
|
106 |
+
numeric_data = filtered_data.select_dtypes(include=['float64', 'int64'])
|
107 |
if not numeric_data.empty:
|
108 |
fig = px.imshow(numeric_data.corr(), color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
|
109 |
fig.update_layout(title='μκ΄κ΄κ³ ννΈλ§΅')
|
|
|
111 |
else:
|
112 |
st.write("μκ΄κ΄κ³ ννΈλ§΅μ 그릴 μ μλ μ«μν μ΄μ΄ μμ΅λλ€.")
|
113 |
|
114 |
+
# μΆμμΌμμ μ±μ κ΄κ³ λΆμ
|
115 |
+
if 'μΆμμΌμ' in filtered_data.columns and 'μ±μ ' in filtered_data.columns:
|
116 |
+
st.write("μΆμμΌμμ μ±μ κ΄κ³:")
|
117 |
+
fig = px.scatter(filtered_data, x='μΆμμΌμ', y='μ±μ ', color='λ°', hover_data=filtered_data.columns)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
# μ 체 λ°μ΄ν°μ λν νκ·μ μΆκ°
|
120 |
+
x = filtered_data['μΆμμΌμ']
|
121 |
+
y = filtered_data['μ±μ ']
|
122 |
slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
|
123 |
line_x = np.array([x.min(), x.max()])
|
124 |
line_y = slope * line_x + intercept
|
|
|
126 |
|
127 |
r_squared = r_value ** 2
|
128 |
fig.update_layout(
|
129 |
+
title=f'μΆμμΌμμ μ±μ κ΄κ³ (R-squared: {r_squared:.4f})',
|
130 |
annotations=[
|
131 |
dict(
|
132 |
x=0.5,
|
|
|
140 |
)
|
141 |
st.plotly_chart(fig)
|
142 |
|
143 |
+
# λ°λ³ μ±μ λΆν¬
|
144 |
+
if 'λ°' in filtered_data.columns and 'μ±μ ' in filtered_data.columns:
|
145 |
+
st.write("λ°λ³ μ±μ λΆν¬:")
|
146 |
+
fig = px.box(filtered_data, x='λ°', y='μ±μ ', points="all")
|
147 |
+
fig.update_layout(title='λ°λ³ μ±μ λΆν¬')
|
148 |
+
st.plotly_chart(fig)
|
149 |
+
|
150 |
+
# μΆμμΌμ ꡬκ°λ³ μ±μ λΆν¬
|
151 |
+
if 'μΆμμΌμ' in filtered_data.columns and 'μ±μ ' in filtered_data.columns:
|
152 |
+
st.write("μΆμμΌμ ꡬκ°λ³ μ±μ λΆν¬:")
|
153 |
+
filtered_data['μΆμμΌμ_ꡬκ°'] = pd.cut(filtered_data['μΆμμΌμ'], bins=5)
|
154 |
+
fig = px.box(filtered_data, x='μΆμμΌμ_ꡬκ°', y='μ±μ ', color='λ°')
|
155 |
+
fig.update_layout(title='μΆμμΌμ ꡬκ°λ³ μ±μ λΆν¬')
|
156 |
+
st.plotly_chart(fig)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
|
158 |
def main():
|
159 |
st.title("μΈν°λν°λΈ EDA ν΄ν·")
|