CosmickVisions commited on
Commit
81c471a
·
verified ·
1 Parent(s): cff9e1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -45
app.py CHANGED
@@ -693,6 +693,7 @@ elif app_mode == "Model Training":
693
  model = grid_search.best_estimator_
694
  st.write("Best Parameters:", grid_search.best_params_)
695
  else:
 
696
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
697
 
698
  else:
@@ -703,7 +704,8 @@ elif app_mode == "Model Training":
703
  model = grid_search.best_estimator_
704
  st.write("Best Parameters:", grid_search.best_params_)
705
  else:
706
- model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
 
707
  elif model_name == "Gradient Boosting":
708
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
709
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
@@ -771,29 +773,8 @@ elif app_mode == "Model Training":
771
  st.subheader("Model Visualization")
772
 
773
  try: #All the plotting code here.
774
- if problem_type == "Classification" and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]:
775
- #Added code that calculates the learning curves
776
- train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy',n_jobs = -1)
777
-
778
- #Then add a plot for the learning curve and use st.pyplot
779
- train_mean = np.mean(train_scores, axis=1)
780
- train_std = np.std(train_scores, axis=1)
781
- valid_mean = np.mean(valid_scores, axis=1)
782
- valid_std = np.std(valid_scores, axis=1)
783
-
784
- fig_lc, ax_lc = plt.subplots() #plot the curve in matplotlib
785
-
786
-
787
- ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training Accuracy')
788
- ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
789
- ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation Accuracy')
790
- ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
791
-
792
- ax_lc.set_title('Learning Curves')
793
- ax_lc.set_xlabel('Training Set Size')
794
- ax_lc.set_ylabel('Accuracy')
795
- ax_lc.legend(loc='best')
796
- st.pyplot(fig_lc) # Display the figure in Streamlit
797
  importances = model.feature_importances_ # Assumed tree-based model
798
  feat_importances = pd.Series(importances, index=X_train.columns)
799
  feat_importances = feat_importances.nlargest(20)
@@ -805,44 +786,35 @@ elif app_mode == "Model Training":
805
  ax_feat.set_title('Feature Importances')
806
  st.pyplot(fig_feat)
807
 
808
- elif problem_type == "Regression" and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]: #graph regressions with regressor based models
809
 
810
- train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
 
811
 
812
- #Then add a plot for the learning curve and use st.pyplot
 
813
  train_mean = np.mean(train_scores, axis=1)
814
  train_std = np.std(train_scores, axis=1)
815
  valid_mean = np.mean(valid_scores, axis=1)
816
  valid_std = np.std(valid_scores, axis=1)
817
 
818
- fig_lc, ax_lc = plt.subplots() #plot the curve in matplotlib
 
819
 
820
 
821
- ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training neg_mean_squared_error')
 
822
  ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
823
- ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation neg_mean_squared_error')
824
  ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
825
 
826
  ax_lc.set_title('Learning Curves')
827
  ax_lc.set_xlabel('Training Set Size')
828
- ax_lc.set_ylabel('Neg Mean Squared Error')
829
  ax_lc.legend(loc='best')
830
- st.pyplot(fig_lc) # Display the figure in Streamlit
831
- importances = model.feature_importances_ # Assumed tree-based model
832
- feat_importances = pd.Series(importances, index=X_train.columns)
833
- feat_importances = feat_importances.nlargest(20)
834
-
835
- fig_feat, ax_feat = plt.subplots()
836
- feat_importances.plot(kind='barh', ax=ax_feat)
837
- ax_feat.set_xlabel('Relative Importance')
838
- ax_feat.set_ylabel('Features')
839
- ax_feat.set_title('Feature Importances')
840
- st.pyplot(fig_feat)
841
-
842
 
843
  except Exception as e: #Local error
844
- st.write(f"Plotting functions requires tree based-models and for classification: {e}")
845
-
846
 
847
  except Exception as e:
848
  st.error(f"An error occurred: {e}")
 
693
  model = grid_search.best_estimator_
694
  st.write("Best Parameters:", grid_search.best_params_)
695
  else:
696
+ model = RandomForestRegressor(random_state=42) #define if no param_grid
697
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
698
 
699
  else:
 
704
  model = grid_search.best_estimator_
705
  st.write("Best Parameters:", grid_search.best_params_)
706
  else:
707
+ model = RandomForestClassifier(random_state=42) #define if no param_grid
708
+ model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
709
  elif model_name == "Gradient Boosting":
710
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
711
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
 
773
  st.subheader("Model Visualization")
774
 
775
  try: #All the plotting code here.
776
+ if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
777
+ #Feature Importance (Tree-based Models) and model selected was good
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
778
  importances = model.feature_importances_ # Assumed tree-based model
779
  feat_importances = pd.Series(importances, index=X_train.columns)
780
  feat_importances = feat_importances.nlargest(20)
 
786
  ax_feat.set_title('Feature Importances')
787
  st.pyplot(fig_feat)
788
 
 
789
 
790
+ #Create data that determines the learning and validation curve and what we have to add
791
+ train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
792
 
793
+
794
+ #Take and define what this is for from the results that has been generated
795
  train_mean = np.mean(train_scores, axis=1)
796
  train_std = np.std(train_scores, axis=1)
797
  valid_mean = np.mean(valid_scores, axis=1)
798
  valid_std = np.std(valid_scores, axis=1)
799
 
800
+
801
+ #Plot each of the variables that has to be used.
802
 
803
 
804
+ fig_lc, ax_lc = plt.subplots()
805
+ ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
806
  ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
807
+ ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
808
  ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
809
 
810
  ax_lc.set_title('Learning Curves')
811
  ax_lc.set_xlabel('Training Set Size')
812
+ ax_lc.set_ylabel('Score')
813
  ax_lc.legend(loc='best')
814
+ st.pyplot(fig_lc)
 
 
 
 
 
 
 
 
 
 
 
815
 
816
  except Exception as e: #Local error
817
+ st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}")
 
818
 
819
  except Exception as e:
820
  st.error(f"An error occurred: {e}")