Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -693,6 +693,7 @@ elif app_mode == "Model Training":
|
|
693 |
model = grid_search.best_estimator_
|
694 |
st.write("Best Parameters:", grid_search.best_params_)
|
695 |
else:
|
|
|
696 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
|
697 |
|
698 |
else:
|
@@ -703,7 +704,8 @@ elif app_mode == "Model Training":
|
|
703 |
model = grid_search.best_estimator_
|
704 |
st.write("Best Parameters:", grid_search.best_params_)
|
705 |
else:
|
706 |
-
|
|
|
707 |
elif model_name == "Gradient Boosting":
|
708 |
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
|
709 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
@@ -771,29 +773,8 @@ elif app_mode == "Model Training":
|
|
771 |
st.subheader("Model Visualization")
|
772 |
|
773 |
try: #All the plotting code here.
|
774 |
-
if
|
775 |
-
#
|
776 |
-
train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy',n_jobs = -1)
|
777 |
-
|
778 |
-
#Then add a plot for the learning curve and use st.pyplot
|
779 |
-
train_mean = np.mean(train_scores, axis=1)
|
780 |
-
train_std = np.std(train_scores, axis=1)
|
781 |
-
valid_mean = np.mean(valid_scores, axis=1)
|
782 |
-
valid_std = np.std(valid_scores, axis=1)
|
783 |
-
|
784 |
-
fig_lc, ax_lc = plt.subplots() #plot the curve in matplotlib
|
785 |
-
|
786 |
-
|
787 |
-
ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
|
788 |
-
ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
|
789 |
-
ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation Accuracy')
|
790 |
-
ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
|
791 |
-
|
792 |
-
ax_lc.set_title('Learning Curves')
|
793 |
-
ax_lc.set_xlabel('Training Set Size')
|
794 |
-
ax_lc.set_ylabel('Accuracy')
|
795 |
-
ax_lc.legend(loc='best')
|
796 |
-
st.pyplot(fig_lc) # Display the figure in Streamlit
|
797 |
importances = model.feature_importances_ # Assumed tree-based model
|
798 |
feat_importances = pd.Series(importances, index=X_train.columns)
|
799 |
feat_importances = feat_importances.nlargest(20)
|
@@ -805,44 +786,35 @@ elif app_mode == "Model Training":
|
|
805 |
ax_feat.set_title('Feature Importances')
|
806 |
st.pyplot(fig_feat)
|
807 |
|
808 |
-
elif problem_type == "Regression" and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]: #graph regressions with regressor based models
|
809 |
|
810 |
-
|
|
|
811 |
|
812 |
-
|
|
|
813 |
train_mean = np.mean(train_scores, axis=1)
|
814 |
train_std = np.std(train_scores, axis=1)
|
815 |
valid_mean = np.mean(valid_scores, axis=1)
|
816 |
valid_std = np.std(valid_scores, axis=1)
|
817 |
|
818 |
-
|
|
|
819 |
|
820 |
|
821 |
-
|
|
|
822 |
ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
|
823 |
-
ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation
|
824 |
ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
|
825 |
|
826 |
ax_lc.set_title('Learning Curves')
|
827 |
ax_lc.set_xlabel('Training Set Size')
|
828 |
-
ax_lc.set_ylabel('
|
829 |
ax_lc.legend(loc='best')
|
830 |
-
st.pyplot(fig_lc)
|
831 |
-
importances = model.feature_importances_ # Assumed tree-based model
|
832 |
-
feat_importances = pd.Series(importances, index=X_train.columns)
|
833 |
-
feat_importances = feat_importances.nlargest(20)
|
834 |
-
|
835 |
-
fig_feat, ax_feat = plt.subplots()
|
836 |
-
feat_importances.plot(kind='barh', ax=ax_feat)
|
837 |
-
ax_feat.set_xlabel('Relative Importance')
|
838 |
-
ax_feat.set_ylabel('Features')
|
839 |
-
ax_feat.set_title('Feature Importances')
|
840 |
-
st.pyplot(fig_feat)
|
841 |
-
|
842 |
|
843 |
except Exception as e: #Local error
|
844 |
-
st.write(f"
|
845 |
-
|
846 |
|
847 |
except Exception as e:
|
848 |
st.error(f"An error occurred: {e}")
|
|
|
693 |
model = grid_search.best_estimator_
|
694 |
st.write("Best Parameters:", grid_search.best_params_)
|
695 |
else:
|
696 |
+
model = RandomForestRegressor(random_state=42) #define if no param_grid
|
697 |
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
|
698 |
|
699 |
else:
|
|
|
704 |
model = grid_search.best_estimator_
|
705 |
st.write("Best Parameters:", grid_search.best_params_)
|
706 |
else:
|
707 |
+
model = RandomForestClassifier(random_state=42) #define if no param_grid
|
708 |
+
model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
|
709 |
elif model_name == "Gradient Boosting":
|
710 |
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
|
711 |
model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
|
|
|
773 |
st.subheader("Model Visualization")
|
774 |
|
775 |
try: #All the plotting code here.
|
776 |
+
if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
|
777 |
+
#Feature Importance (Tree-based Models) and model selected was good
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
778 |
importances = model.feature_importances_ # Assumed tree-based model
|
779 |
feat_importances = pd.Series(importances, index=X_train.columns)
|
780 |
feat_importances = feat_importances.nlargest(20)
|
|
|
786 |
ax_feat.set_title('Feature Importances')
|
787 |
st.pyplot(fig_feat)
|
788 |
|
|
|
789 |
|
790 |
+
#Create data that determines the learning and validation curve and what we have to add
|
791 |
+
train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
|
792 |
|
793 |
+
|
794 |
+
#Take and define what this is for from the results that has been generated
|
795 |
train_mean = np.mean(train_scores, axis=1)
|
796 |
train_std = np.std(train_scores, axis=1)
|
797 |
valid_mean = np.mean(valid_scores, axis=1)
|
798 |
valid_std = np.std(valid_scores, axis=1)
|
799 |
|
800 |
+
|
801 |
+
#Plot each of the variables that has to be used.
|
802 |
|
803 |
|
804 |
+
fig_lc, ax_lc = plt.subplots()
|
805 |
+
ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
|
806 |
ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
|
807 |
+
ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
|
808 |
ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
|
809 |
|
810 |
ax_lc.set_title('Learning Curves')
|
811 |
ax_lc.set_xlabel('Training Set Size')
|
812 |
+
ax_lc.set_ylabel('Score')
|
813 |
ax_lc.legend(loc='best')
|
814 |
+
st.pyplot(fig_lc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
815 |
|
816 |
except Exception as e: #Local error
|
817 |
+
st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}")
|
|
|
818 |
|
819 |
except Exception as e:
|
820 |
st.error(f"An error occurred: {e}")
|