Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -603,7 +603,7 @@ elif app_mode == "Model Training":
|
|
603 |
# Model Selection
|
604 |
model_name = st.selectbox("Select Model", [
|
605 |
"Linear Regression", "Logistic Regression", "Decision Tree",
|
606 |
-
"Random Forest", "Gradient Boosting", "SVM"
|
607 |
], help="Choose a model.")
|
608 |
|
609 |
feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
|
@@ -697,34 +697,107 @@ elif app_mode == "Model Training":
|
|
697 |
st.write("Best Parameters:", grid_search.best_params_)
|
698 |
else:
|
699 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
|
700 |
-
|
701 |
elif model_name == "Gradient Boosting":
|
702 |
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
|
703 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
704 |
elif model_name == "SVM":
|
705 |
model = SVR() if problem_type == "Regression" else SVC()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
706 |
|
707 |
-
#
|
708 |
-
|
709 |
-
|
710 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
711 |
|
712 |
-
model.fit(X_train_selected, y_train)
|
713 |
|
714 |
-
# Store model and preprocessor
|
715 |
-
st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
|
716 |
-
st.session_state.preprocessor = preprocessor
|
717 |
|
718 |
# Model Evaluation
|
719 |
-
|
720 |
if problem_type == "Regression":
|
721 |
mse = mean_squared_error(y_test, y_pred)
|
722 |
r2 = r2_score(y_test, y_pred)
|
723 |
st.write(f"Mean Squared Error: {mse:.4f}")
|
724 |
st.write(f"R-squared: {r2:.4f}")
|
725 |
else:
|
|
|
|
|
|
|
|
|
|
|
726 |
accuracy = accuracy_score(y_test, y_pred)
|
|
|
|
|
|
|
727 |
st.write(f"Accuracy: {accuracy:.4f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
728 |
|
729 |
st.success("Model trained successfully!")
|
730 |
|
@@ -751,7 +824,7 @@ elif app_mode == "Model Training":
|
|
751 |
st.error(f"Error loading model: {e}")
|
752 |
|
753 |
#Model Evaluation Section
|
754 |
-
if 'X_test' in locals() and st.session_state.model is not None:
|
755 |
y_pred = st.session_state.model.predict(X_test)
|
756 |
|
757 |
if problem_type == "Regression":
|
|
|
603 |
# Model Selection
|
604 |
model_name = st.selectbox("Select Model", [
|
605 |
"Linear Regression", "Logistic Regression", "Decision Tree",
|
606 |
+
"Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN" # Expanded models
|
607 |
], help="Choose a model.")
|
608 |
|
609 |
feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
|
|
|
697 |
st.write("Best Parameters:", grid_search.best_params_)
|
698 |
else:
|
699 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
|
|
|
700 |
elif model_name == "Gradient Boosting":
|
701 |
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
|
702 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
703 |
elif model_name == "SVM":
|
704 |
model = SVR() if problem_type == "Regression" else SVC()
|
705 |
+
elif model_name == "Naive Bayes":
|
706 |
+
from sklearn.naive_bayes import GaussianNB
|
707 |
+
model = GaussianNB()
|
708 |
+
elif model_name == "KNN":
|
709 |
+
from sklearn.neighbors import KNeighborsClassifier
|
710 |
+
model = KNeighborsClassifier()
|
711 |
+
|
712 |
+
# Cost-Sensitive Classification
|
713 |
+
cost_matrix = None
|
714 |
+
if problem_type == "Classification":
|
715 |
+
cost_sensitive = st.checkbox("Enable Cost-Sensitive Classification") #new
|
716 |
+
|
717 |
+
if cost_sensitive:
|
718 |
+
#Get class labels
|
719 |
+
classes = np.unique(y_train)
|
720 |
+
|
721 |
+
#Create a matrix, with default cost being 1
|
722 |
+
cost_matrix = np.ones((len(classes),len(classes)))
|
723 |
+
|
724 |
+
#Cost of correct predictions are 0
|
725 |
+
np.fill_diagonal(cost_matrix, 0)
|
726 |
+
|
727 |
+
#Allow for individual weight specification
|
728 |
+
st.write("Define misclassification costs:")
|
729 |
+
|
730 |
+
for i in range(len(classes)):
|
731 |
+
for j in range(len(classes)):
|
732 |
+
if i != j:
|
733 |
+
cost_matrix[i,j] = st.number_input(f"Cost of classifying {classes[i]} as {classes[j]}", value=1.0, min_value=0.0)
|
734 |
+
|
735 |
|
736 |
+
#Threshold adjustment options, only shows up for log regression
|
737 |
+
|
738 |
+
if model_name == "Logistic Regression" and problem_type == "Classification":
|
739 |
+
threshold = st.slider("Select Threshold", 0.0, 1.0, 0.5, 0.01, help="Adjust the classification threshold")
|
740 |
+
model = LogisticRegression(max_iter=1000)
|
741 |
+
|
742 |
+
model.fit(X_train_selected, y_train) #Fit model
|
743 |
+
|
744 |
+
#Adjust predictions according to threshold and make new variables
|
745 |
+
y_pred_prob = model.predict_proba(X_test_selected)[:,1]
|
746 |
+
y_pred = (y_pred_prob > threshold).astype(int)
|
747 |
+
|
748 |
+
|
749 |
+
else:
|
750 |
+
# Cross-validation
|
751 |
+
cv_scores = cross_val_score(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type == "Classification" else 'neg_mean_squared_error') #example, adjust cv
|
752 |
+
st.write(f"Cross-validation scores: {cv_scores}")
|
753 |
+
st.write(f"Mean cross-validation score: {cv_scores.mean():.4f}")
|
754 |
+
|
755 |
+
model.fit(X_train_selected, y_train)
|
756 |
+
|
757 |
+
# Store model and preprocessor
|
758 |
+
st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
|
759 |
+
st.session_state.preprocessor = preprocessor
|
760 |
+
|
761 |
+
y_pred = model.predict(X_test_selected)
|
762 |
|
|
|
763 |
|
|
|
|
|
|
|
764 |
|
765 |
# Model Evaluation
|
766 |
+
|
767 |
if problem_type == "Regression":
|
768 |
mse = mean_squared_error(y_test, y_pred)
|
769 |
r2 = r2_score(y_test, y_pred)
|
770 |
st.write(f"Mean Squared Error: {mse:.4f}")
|
771 |
st.write(f"R-squared: {r2:.4f}")
|
772 |
else:
|
773 |
+
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
|
774 |
+
|
775 |
+
#Weighted averaging for metrics for multiclass
|
776 |
+
average_method = "weighted" #changed from None
|
777 |
+
|
778 |
accuracy = accuracy_score(y_test, y_pred)
|
779 |
+
precision = precision_score(y_test, y_pred, average = average_method, zero_division = 0)
|
780 |
+
recall = recall_score(y_test, y_pred, average = average_method, zero_division = 0)
|
781 |
+
f1 = f1_score(y_test, y_pred, average = average_method, zero_division = 0)
|
782 |
st.write(f"Accuracy: {accuracy:.4f}")
|
783 |
+
st.write(f"Precision: {precision:.4f}")
|
784 |
+
st.write(f"Recall: {recall:.4f}")
|
785 |
+
st.write(f"F1 Score: {f1:.4f}")
|
786 |
+
st.write("Classification Report:")
|
787 |
+
st.text(classification_report(y_test, y_pred, zero_division = 0))
|
788 |
+
|
789 |
+
|
790 |
+
#Confusion Matrix
|
791 |
+
|
792 |
+
conf_matrix = confusion_matrix(y_test, y_pred)
|
793 |
+
|
794 |
+
#Heatmap
|
795 |
+
fig_conf, ax_conf = plt.subplots()
|
796 |
+
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)
|
797 |
+
ax_conf.set_xlabel('Predicted Labels')
|
798 |
+
ax_conf.set_ylabel('True Labels')
|
799 |
+
ax_conf.set_title('Confusion Matrix')
|
800 |
+
st.pyplot(fig_conf)
|
801 |
|
802 |
st.success("Model trained successfully!")
|
803 |
|
|
|
824 |
st.error(f"Error loading model: {e}")
|
825 |
|
826 |
#Model Evaluation Section
|
827 |
+
if 'X_test' in locals() and st.session_state.model is not None and problem_type == "Regression":
|
828 |
y_pred = st.session_state.model.predict(X_test)
|
829 |
|
830 |
if problem_type == "Regression":
|