Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -44,59 +44,56 @@ def main():
|
|
44 |
predictors = st.multiselect(
|
45 |
'Select predictor variables for regression:',
|
46 |
options=predictor_options,
|
47 |
-
default=
|
48 |
)
|
49 |
|
50 |
-
if not predictors:
|
51 |
-
st.error("Please select at least one predictor variable.")
|
52 |
-
return
|
53 |
-
|
54 |
-
st.write("## Scatter Plot")
|
55 |
-
if len(predictors) == 1:
|
56 |
-
fig, ax = plt.subplots()
|
57 |
-
ax.scatter(df[predictors[0]], df[target])
|
58 |
-
ax.set_xlabel(predictors[0])
|
59 |
-
ax.set_ylabel(target)
|
60 |
-
ax.set_title(f'Relationship between {predictors[0]} and {target}')
|
61 |
-
st.pyplot(fig)
|
62 |
-
else:
|
63 |
-
st.write("Scatter plot is only available for a single predictor.")
|
64 |
-
|
65 |
# Splitting data for regression
|
66 |
X = df[predictors]
|
67 |
y = df[target]
|
68 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
69 |
|
70 |
-
# Perform
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
st.
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
ax.
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
st.pyplot(fig)
|
101 |
|
102 |
if __name__ == "__main__":
|
@@ -105,3 +102,4 @@ if __name__ == "__main__":
|
|
105 |
|
106 |
|
107 |
|
|
|
|
44 |
predictors = st.multiselect(
|
45 |
'Select predictor variables for regression:',
|
46 |
options=predictor_options,
|
47 |
+
default=predictor_options # default to all predictors for MLR
|
48 |
)
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Splitting data for regression
|
51 |
X = df[predictors]
|
52 |
y = df[target]
|
53 |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
54 |
|
55 |
+
# Perform multilinear regression
|
56 |
+
mlr_model = LinearRegression()
|
57 |
+
mlr_model.fit(X_train, y_train)
|
58 |
+
mlr_y_pred = mlr_model.predict(X_test)
|
59 |
+
mlr_rmse = np.sqrt(mean_squared_error(y_test, mlr_y_pred))
|
60 |
+
mlr_r2 = r2_score(y_test, mlr_y_pred)
|
61 |
+
|
62 |
+
# Perform simple linear regression using only one predictor if possible
|
63 |
+
if 'AveRooms' in predictors:
|
64 |
+
slr_model = LinearRegression()
|
65 |
+
slr_X_train = X_train[['AveRooms']]
|
66 |
+
slr_X_test = X_test[['AveRooms']]
|
67 |
+
slr_model.fit(slr_X_train, y_train)
|
68 |
+
slr_y_pred = slr_model.predict(slr_X_test)
|
69 |
+
slr_rmse = np.sqrt(mean_squared_error(y_test, slr_y_pred))
|
70 |
+
slr_r2 = r2_score(y_test, slr_y_pred)
|
71 |
+
|
72 |
+
# Display RMSE and R-squared comparisons
|
73 |
+
st.write("## Regression Performance Comparison")
|
74 |
+
st.write(f"### Multilinear Regression (using all selected predictors)")
|
75 |
+
st.write(f'RMSE: {mlr_rmse}')
|
76 |
+
st.write(f'R-squared: {mlr_r2}')
|
77 |
+
|
78 |
+
st.write(f"### Simple Linear Regression (using 'AveRooms')")
|
79 |
+
st.write(f'RMSE: {slr_rmse}')
|
80 |
+
st.write(f'R-squared: {slr_r2}')
|
81 |
+
|
82 |
+
# Plotting both regressions
|
83 |
+
fig, ax = plt.subplots(1, 2, figsize=(15, 6))
|
84 |
+
|
85 |
+
ax[0].scatter(y_test, mlr_y_pred, color='blue')
|
86 |
+
ax[0].plot(y_test, y_test, color='red')
|
87 |
+
ax[0].set_title('Multilinear Regression: Actual vs Predicted')
|
88 |
+
ax[0].set_xlabel('Actual Values')
|
89 |
+
ax[0].set_ylabel('Predicted Values')
|
90 |
+
|
91 |
+
ax[1].scatter(y_test, slr_y_pred, color='green')
|
92 |
+
ax[1].plot(y_test, y_test, color='red')
|
93 |
+
ax[1].set_title("Simple Linear Regression ('AveRooms'): Actual vs Predicted")
|
94 |
+
ax[1].set_xlabel('Actual Values')
|
95 |
+
ax[1].set_ylabel('Predicted Values')
|
96 |
+
|
97 |
st.pyplot(fig)
|
98 |
|
99 |
if __name__ == "__main__":
|
|
|
102 |
|
103 |
|
104 |
|
105 |
+
|