Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Feb 28

Commit

81c471a

verified ·

1 Parent(s): cff9e1f

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -45

app.py CHANGED Viewed

@@ -693,6 +693,7 @@ elif app_mode == "Model Training":
                                 model = grid_search.best_estimator_
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
                         else:
@@ -703,7 +704,8 @@ elif app_mode == "Model Training":
                                 model = grid_search.best_estimator_
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
-                                model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
                     elif model_name == "Gradient Boosting":
                         from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
@@ -771,29 +773,8 @@ elif app_mode == "Model Training":
                     st.subheader("Model Visualization")
                     try: #All the plotting code here.
-                        if problem_type == "Classification" and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]:
-                            #Added code that calculates the learning curves
-                            train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy',n_jobs = -1)
-                            #Then add a plot for the learning curve and use st.pyplot
-                            train_mean = np.mean(train_scores, axis=1)
-                            train_std = np.std(train_scores, axis=1)
-                            valid_mean = np.mean(valid_scores, axis=1)
-                            valid_std = np.std(valid_scores, axis=1)
-                            fig_lc, ax_lc = plt.subplots() #plot the curve in matplotlib
-                            ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
-                            ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
-                            ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation Accuracy')
-                            ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
-                            ax_lc.set_title('Learning Curves')
-                            ax_lc.set_xlabel('Training Set Size')
-                            ax_lc.set_ylabel('Accuracy')
-                            ax_lc.legend(loc='best')
-                            st.pyplot(fig_lc)  # Display the figure in Streamlit
                             importances = model.feature_importances_ # Assumed tree-based model
                             feat_importances = pd.Series(importances, index=X_train.columns)
                             feat_importances = feat_importances.nlargest(20)
@@ -805,44 +786,35 @@ elif app_mode == "Model Training":
                             ax_feat.set_title('Feature Importances')
                             st.pyplot(fig_feat)
-                         elif problem_type == "Regression" and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]: #graph regressions with regressor based models
-                            train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
-                            #Then add a plot for the learning curve and use st.pyplot
                             train_mean = np.mean(train_scores, axis=1)
                             train_std = np.std(train_scores, axis=1)
                             valid_mean = np.mean(valid_scores, axis=1)
                             valid_std = np.std(valid_scores, axis=1)
-                            fig_lc, ax_lc = plt.subplots() #plot the curve in matplotlib
-                            ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training neg_mean_squared_error')
                             ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
-                            ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation neg_mean_squared_error')
                             ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
                             ax_lc.set_title('Learning Curves')
                             ax_lc.set_xlabel('Training Set Size')
-                            ax_lc.set_ylabel('Neg Mean Squared Error')
                             ax_lc.legend(loc='best')
-                            st.pyplot(fig_lc)  # Display the figure in Streamlit
-                            importances = model.feature_importances_ # Assumed tree-based model
-                            feat_importances = pd.Series(importances, index=X_train.columns)
-                            feat_importances = feat_importances.nlargest(20)
-                            fig_feat, ax_feat = plt.subplots()
-                            feat_importances.plot(kind='barh', ax=ax_feat)
-                            ax_feat.set_xlabel('Relative Importance')
-                            ax_feat.set_ylabel('Features')
-                            ax_feat.set_title('Feature Importances')
-                            st.pyplot(fig_feat)
                     except Exception as e: #Local error
-                        st.write(f"Plotting functions requires tree based-models and for classification: {e}")
                 except Exception as e:
                     st.error(f"An error occurred: {e}")

                                 model = grid_search.best_estimator_
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
+                                model = RandomForestRegressor(random_state=42) #define if no param_grid
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
                         else:
                                 model = grid_search.best_estimator_
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
+                                 model = RandomForestClassifier(random_state=42) #define if no param_grid
+                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
                     elif model_name == "Gradient Boosting":
                         from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
                     st.subheader("Model Visualization")
                     try: #All the plotting code here.
+                        if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
+                            #Feature Importance (Tree-based Models) and model selected was good
                             importances = model.feature_importances_ # Assumed tree-based model
                             feat_importances = pd.Series(importances, index=X_train.columns)
                             feat_importances = feat_importances.nlargest(20)
                             ax_feat.set_title('Feature Importances')
                             st.pyplot(fig_feat)
+                         #Create data that determines the learning and validation curve and what we have to add
+                            train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
+                            #Take and define what this is for from the results that has been generated
                             train_mean = np.mean(train_scores, axis=1)
                             train_std = np.std(train_scores, axis=1)
                             valid_mean = np.mean(valid_scores, axis=1)
                             valid_std = np.std(valid_scores, axis=1)
+                            #Plot each of the variables that has to be used.
+                            fig_lc, ax_lc = plt.subplots()
+                            ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
                             ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
+                            ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
                             ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
                             ax_lc.set_title('Learning Curves')
                             ax_lc.set_xlabel('Training Set Size')
+                            ax_lc.set_ylabel('Score')
                             ax_lc.legend(loc='best')
+                            st.pyplot(fig_lc)
                     except Exception as e: #Local error
+                        st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}")
                 except Exception as e:
                     st.error(f"An error occurred: {e}")