Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -213,16 +213,68 @@ if show_regression and selected_feature in df.columns and selected_target == 'Me
|
|
213 |
|
214 |
st.pyplot(fig)
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
# Prediction
|
217 |
st.subheader('Predict Median House Value')
|
218 |
|
219 |
-
# Input values for prediction
|
220 |
input_values = {}
|
221 |
-
for feature in
|
222 |
input_values[feature] = st.number_input(f'Enter {feature}', value=float(df[feature].mean()))
|
223 |
|
224 |
if st.button('Predict'):
|
225 |
input_data = np.array([list(input_values.values())])
|
226 |
-
prediction =
|
227 |
st.write(f'Predicted Median House Value: {prediction[0]}')
|
228 |
|
|
|
213 |
|
214 |
st.pyplot(fig)
|
215 |
|
216 |
+
# Simple Linear Regression
|
217 |
+
X_single = df[['MedInc']]
|
218 |
+
y = df['MedHouseVal']
|
219 |
+
|
220 |
+
X_train_single, X_test_single, y_train_single, y_test_single = train_test_split(X_single, y, test_size=0.2, random_state=42)
|
221 |
+
|
222 |
+
model_single = LinearRegression()
|
223 |
+
model_single.fit(X_train_single, y_train_single)
|
224 |
+
|
225 |
+
y_pred_single = model_single.predict(X_test_single)
|
226 |
+
|
227 |
+
r2_single = r2_score(y_test_single, y_pred_single)
|
228 |
+
|
229 |
+
# Plot the regression line for simple linear regression
|
230 |
+
fig, ax = plt.subplots()
|
231 |
+
ax.scatter(X_test_single, y_test_single, color='blue', alpha=0.3, label='Actual')
|
232 |
+
ax.plot(X_test_single, y_pred_single, color='red', linewidth=2, label='Predicted')
|
233 |
+
ax.set_title('Simple Linear Regression: MedInc vs MedHouseVal')
|
234 |
+
ax.set_xlabel('MedInc')
|
235 |
+
ax.set_ylabel('MedHouseVal')
|
236 |
+
ax.legend()
|
237 |
+
st.pyplot(fig)
|
238 |
+
|
239 |
+
# Multilinear Regression
|
240 |
+
X_multi = df[['MedInc', 'AveRooms', 'HouseAge', 'AveOccup']]
|
241 |
+
y = df['MedHouseVal']
|
242 |
+
|
243 |
+
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(X_multi, y, test_size=0.2, random_state=42)
|
244 |
+
|
245 |
+
model_multi = LinearRegression()
|
246 |
+
model_multi.fit(X_train_multi, y_train_multi)
|
247 |
+
|
248 |
+
y_pred_multi = model_multi.predict(X_test_multi)
|
249 |
+
|
250 |
+
r2_multi = r2_score(y_test_multi, y_pred_multi)
|
251 |
+
|
252 |
+
# Add checkbox for multilinear regression plot
|
253 |
+
show_multilinear_plot = st.checkbox('Show Multilinear Regression Plot')
|
254 |
+
|
255 |
+
if show_multilinear_plot:
|
256 |
+
fig, ax = plt.subplots()
|
257 |
+
ax.scatter(y_test_multi, y_pred_multi, color='blue', alpha=0.3)
|
258 |
+
ax.plot([y_test_multi.min(), y_test_multi.max()], [y_test_multi.min(), y_test_multi.max()], 'k--', lw=2, color='green')
|
259 |
+
ax.set_title('Multilinear Regression: Actual vs. Predicted MedHouseVal')
|
260 |
+
ax.set_xlabel('Actual MedHouseVal')
|
261 |
+
ax.set_ylabel('Predicted MedHouseVal')
|
262 |
+
st.pyplot(fig)
|
263 |
+
|
264 |
+
# Compare R-squared values
|
265 |
+
st.subheader('R-squared Comparison')
|
266 |
+
st.write(f"Simple Linear Regression R-squared: {r2_single:.4f}")
|
267 |
+
st.write(f"Multilinear Regression R-squared: {r2_multi:.4f}")
|
268 |
+
|
269 |
# Prediction
|
270 |
st.subheader('Predict Median House Value')
|
271 |
|
|
|
272 |
input_values = {}
|
273 |
+
for feature in X_multi.columns:
|
274 |
input_values[feature] = st.number_input(f'Enter {feature}', value=float(df[feature].mean()))
|
275 |
|
276 |
if st.button('Predict'):
|
277 |
input_data = np.array([list(input_values.values())])
|
278 |
+
prediction = model_multi.predict(input_data)
|
279 |
st.write(f'Predicted Median House Value: {prediction[0]}')
|
280 |
|