Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -603,25 +603,43 @@ elif app_mode == "Model Training":
|
|
603 |
# Feature Selection
|
604 |
feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
|
605 |
|
606 |
-
# Model Selection
|
607 |
-
|
608 |
-
"Linear Regression", "
|
609 |
-
|
610 |
-
|
|
|
|
|
611 |
|
612 |
feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
|
613 |
|
614 |
-
|
615 |
-
|
616 |
-
|
|
|
617 |
param_grid = {
|
618 |
-
'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="
|
619 |
'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "max_depth2")+1))),
|
620 |
'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key="min_samples_split1"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key = "min_samples_split2")+1))), #New hyperparameter
|
621 |
'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key="min_samples_leaf1"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key = "min_samples_leaf2")+1))), #New hyperparameter
|
622 |
}
|
623 |
|
624 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
625 |
test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
|
626 |
|
627 |
if st.button("Train Model"):
|
@@ -673,33 +691,27 @@ elif app_mode == "Model Training":
|
|
673 |
# Model Training and Hyperparameter Tuning
|
674 |
if model_name == "Linear Regression":
|
675 |
model = LinearRegression()
|
676 |
-
model.fit(X_train_selected, y_train)
|
677 |
-
|
678 |
elif model_name == "Logistic Regression":
|
679 |
model = LogisticRegression(max_iter=1000)
|
680 |
-
model.fit(X_train_selected, y_train)
|
681 |
elif model_name == "Decision Tree":
|
682 |
if problem_type == "Regression":
|
683 |
model = DecisionTreeRegressor()
|
684 |
-
model.fit(X_train_selected, y_train)
|
685 |
else:
|
686 |
model = DecisionTreeClassifier()
|
687 |
-
model.fit(X_train_selected, y_train)
|
688 |
elif model_name == "Random Forest":
|
689 |
if problem_type == "Regression":
|
690 |
model = RandomForestRegressor(random_state=42)
|
691 |
-
if 'param_grid' in locals():
|
692 |
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
|
693 |
grid_search.fit(X_train_selected, y_train)
|
694 |
model = grid_search.best_estimator_
|
695 |
st.write("Best Parameters:", grid_search.best_params_)
|
696 |
else:
|
697 |
-
model = RandomForestRegressor(random_state=42) #define if no param_grid
|
698 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
|
699 |
|
700 |
else:
|
701 |
model = RandomForestClassifier(random_state=42)
|
702 |
-
if 'param_grid' in locals():
|
703 |
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
|
704 |
grid_search.fit(X_train_selected, y_train)
|
705 |
model = grid_search.best_estimator_
|
@@ -760,7 +772,7 @@ elif app_mode == "Model Training":
|
|
760 |
#Confusion Matrix
|
761 |
|
762 |
conf_matrix = confusion_matrix(y_test, y_pred)
|
763 |
-
|
764 |
# Assuming conf_matrix is your confusion matrix
|
765 |
fig_conf, ax_conf = plt.subplots()
|
766 |
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf) # Corrected named argument
|
@@ -769,14 +781,14 @@ elif app_mode == "Model Training":
|
|
769 |
ax_conf.set_title('Confusion Matrix')
|
770 |
st.pyplot(fig_conf)
|
771 |
|
|
|
772 |
#Added section for model visualization
|
773 |
st.subheader("Model Visualization")
|
774 |
#Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
|
775 |
if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
|
776 |
try: #All the plotting code here.
|
777 |
if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
|
778 |
-
|
779 |
-
#Make sure you use this inside of a conditional for classification, model, and tree based model.
|
780 |
|
781 |
#Feature Importance (Tree-based Models)
|
782 |
|
@@ -794,7 +806,6 @@ elif app_mode == "Model Training":
|
|
794 |
#Create data that determines the learning and validation curve and what we have to add
|
795 |
train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
|
796 |
|
797 |
-
|
798 |
#Take and define what this is for from the results that has been generated
|
799 |
train_mean = np.mean(train_scores, axis=1)
|
800 |
train_std = np.std(train_scores, axis=1)
|
@@ -815,13 +826,9 @@ elif app_mode == "Model Training":
|
|
815 |
ax_lc.legend(loc='best')
|
816 |
st.pyplot(fig_lc)
|
817 |
|
818 |
-
|
819 |
except Exception as e: #Local error
|
820 |
st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
|
821 |
|
822 |
-
|
823 |
-
|
824 |
-
|
825 |
except Exception as e:
|
826 |
st.error(f"An error occurred: {e}")
|
827 |
|
@@ -862,7 +869,6 @@ elif app_mode == "Model Training":
|
|
862 |
st.write(f"Accuracy: {accuracy:.4f}")
|
863 |
except Exception as e: #local error
|
864 |
st.error(f"An error occurred during model evaluation: {e}")
|
865 |
-
|
866 |
elif app_mode == "Predictions":
|
867 |
st.title("🔮 Make Predictions")
|
868 |
|
|
|
603 |
# Feature Selection
|
604 |
feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
|
605 |
|
606 |
+
# Model Selection - Dynamic based on Problem Type
|
607 |
+
if problem_type == "Regression":
|
608 |
+
model_options = ["Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM"]
|
609 |
+
else: # Classification
|
610 |
+
model_options = ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN"]
|
611 |
+
|
612 |
+
model_name = st.selectbox("Select Model", model_options, help="Choose a model.")
|
613 |
|
614 |
feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
|
615 |
|
616 |
+
# Hyperparameter Tuning - Dynamic based on Model Selection
|
617 |
+
param_grid = {} # Initialize to empty dictionary
|
618 |
+
if model_name == "Random Forest":
|
619 |
+
st.subheader("Random Forest Hyperparameters")
|
620 |
param_grid = {
|
621 |
+
'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="n_estimators"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "n_estimators2")+1))),
|
622 |
'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "max_depth2")+1))),
|
623 |
'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key="min_samples_split1"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key = "min_samples_split2")+1))), #New hyperparameter
|
624 |
'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key="min_samples_leaf1"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key = "min_samples_leaf2")+1))), #New hyperparameter
|
625 |
}
|
626 |
|
627 |
+
elif model_name == "Gradient Boosting":
|
628 |
+
st.subheader("Gradient Boosting Hyperparameters")
|
629 |
+
param_grid = {
|
630 |
+
'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="gb_n_estimators1"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "gb_n_estimators2")+1))),
|
631 |
+
'learning_rate': [st.slider("Learning Rate", 0.01, 1.0, 0.1, step=0.01, help="Learning rate", key = 'gb_learning_rate')], # Example, add more
|
632 |
+
'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="gb_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "gb_max_depth2")+1))),
|
633 |
+
}
|
634 |
+
|
635 |
+
elif model_name == "Decision Tree":
|
636 |
+
st.subheader("Decision Tree Hyperparameters")
|
637 |
+
param_grid = {
|
638 |
+
'criterion': st.selectbox("Criterion", ["gini", "entropy"], help="Splitting criterion"),
|
639 |
+
'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="dt_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "dt_max_depth2")+1))),
|
640 |
+
}
|
641 |
+
|
642 |
+
# Train-Test Split
|
643 |
test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
|
644 |
|
645 |
if st.button("Train Model"):
|
|
|
691 |
# Model Training and Hyperparameter Tuning
|
692 |
if model_name == "Linear Regression":
|
693 |
model = LinearRegression()
|
|
|
|
|
694 |
elif model_name == "Logistic Regression":
|
695 |
model = LogisticRegression(max_iter=1000)
|
|
|
696 |
elif model_name == "Decision Tree":
|
697 |
if problem_type == "Regression":
|
698 |
model = DecisionTreeRegressor()
|
|
|
699 |
else:
|
700 |
model = DecisionTreeClassifier()
|
|
|
701 |
elif model_name == "Random Forest":
|
702 |
if problem_type == "Regression":
|
703 |
model = RandomForestRegressor(random_state=42)
|
704 |
+
if 'param_grid' in locals() and param_grid: #added param_grid not empty condition
|
705 |
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
|
706 |
grid_search.fit(X_train_selected, y_train)
|
707 |
model = grid_search.best_estimator_
|
708 |
st.write("Best Parameters:", grid_search.best_params_)
|
709 |
else:
|
|
|
710 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
|
711 |
|
712 |
else:
|
713 |
model = RandomForestClassifier(random_state=42)
|
714 |
+
if 'param_grid' in locals()and param_grid: #added param_grid not empty condition
|
715 |
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
|
716 |
grid_search.fit(X_train_selected, y_train)
|
717 |
model = grid_search.best_estimator_
|
|
|
772 |
#Confusion Matrix
|
773 |
|
774 |
conf_matrix = confusion_matrix(y_test, y_pred)
|
775 |
+
|
776 |
# Assuming conf_matrix is your confusion matrix
|
777 |
fig_conf, ax_conf = plt.subplots()
|
778 |
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf) # Corrected named argument
|
|
|
781 |
ax_conf.set_title('Confusion Matrix')
|
782 |
st.pyplot(fig_conf)
|
783 |
|
784 |
+
|
785 |
#Added section for model visualization
|
786 |
st.subheader("Model Visualization")
|
787 |
#Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
|
788 |
if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
|
789 |
try: #All the plotting code here.
|
790 |
if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
|
791 |
+
#Make sure you use this inside of a conditional for classification, model, and tree based model.
|
|
|
792 |
|
793 |
#Feature Importance (Tree-based Models)
|
794 |
|
|
|
806 |
#Create data that determines the learning and validation curve and what we have to add
|
807 |
train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
|
808 |
|
|
|
809 |
#Take and define what this is for from the results that has been generated
|
810 |
train_mean = np.mean(train_scores, axis=1)
|
811 |
train_std = np.std(train_scores, axis=1)
|
|
|
826 |
ax_lc.legend(loc='best')
|
827 |
st.pyplot(fig_lc)
|
828 |
|
|
|
829 |
except Exception as e: #Local error
|
830 |
st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
|
831 |
|
|
|
|
|
|
|
832 |
except Exception as e:
|
833 |
st.error(f"An error occurred: {e}")
|
834 |
|
|
|
869 |
st.write(f"Accuracy: {accuracy:.4f}")
|
870 |
except Exception as e: #local error
|
871 |
st.error(f"An error occurred during model evaluation: {e}")
|
|
|
872 |
elif app_mode == "Predictions":
|
873 |
st.title("🔮 Make Predictions")
|
874 |
|