CosmickVisions commited on
Commit
95e0685
·
verified ·
1 Parent(s): c1c0798

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -52
app.py CHANGED
@@ -588,7 +588,6 @@ elif app_mode == "Advanced EDA":
588
  except Exception as e:
589
  st.error(f"An error occurred during the T-test: {e}")
590
 
591
- #MODEL TRAINING
592
  elif app_mode == "Model Training":
593
  st.title("🚂 Model Training")
594
 
@@ -612,9 +611,6 @@ elif app_mode == "Model Training":
612
 
613
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
614
 
615
- # Hyperparameter tuning
616
- tuning_method = st.selectbox("Hyperparameter Tuning Method",["Grid Search","Bayesian Optimization","None"])
617
-
618
  if model_name == "Random Forest" and feature_columns: # Check if Random Forest and features are selected
619
  min_features = 1 # Ensure at least one feature is used
620
  max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
@@ -674,41 +670,43 @@ elif app_mode == "Model Training":
674
  X_train_selected = X_train_processed
675
  X_test_selected = X_test_processed
676
 
677
- # Model Training and Hyperparameter Tuning
678
  if model_name == "Linear Regression":
679
  model = LinearRegression()
 
 
680
  elif model_name == "Logistic Regression":
681
  model = LogisticRegression(max_iter=1000)
 
682
  elif model_name == "Decision Tree":
683
  if problem_type == "Regression":
684
  model = DecisionTreeRegressor()
 
685
  else:
686
  model = DecisionTreeClassifier()
 
687
  elif model_name == "Random Forest":
688
- if tuning_method == "Bayesian Optimization":
689
- st.write("Implementing this function to be added soon")
690
- elif problem_type == "Regression":
691
- model = RandomForestRegressor(random_state=42)
692
- if 'param_grid' in locals():
693
- grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
694
- grid_search.fit(X_train_selected, y_train)
695
- model = grid_search.best_estimator_
696
- st.write("Best Parameters:", grid_search.best_params_)
697
- else:
698
- model = RandomForestRegressor(random_state=42) #define if no param_grid
699
- model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
700
-
701
-
702
- else:
703
- model = RandomForestClassifier(random_state=42)
704
- if 'param_grid' in locals():
705
- grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
706
- grid_search.fit(X_train_selected, y_train)
707
- model = grid_search.best_estimator_
708
- st.write("Best Parameters:", grid_search.best_params_)
709
- else:
710
- model = RandomForestClassifier(random_state=42) #define if no param_grid
711
- model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
712
  elif model_name == "Gradient Boosting":
713
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
714
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
@@ -727,10 +725,6 @@ elif app_mode == "Model Training":
727
 
728
  # Store model and preprocessor
729
  st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
730
- st.session_state.preprocessor = preprocessor
731
-
732
- # Store model and preprocessor
733
- st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
734
  st.session_state.preprocessor = preprocessor
735
 
736
  # Model Evaluation
@@ -767,9 +761,9 @@ elif app_mode == "Model Training":
767
 
768
  conf_matrix = confusion_matrix(y_test, y_pred)
769
 
770
- # Assuming conf_matrix is your confusion matrix
771
  fig_conf, ax_conf = plt.subplots()
772
- sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)
773
  ax_conf.set_xlabel('Predicted Labels')
774
  ax_conf.set_ylabel('True Labels')
775
  ax_conf.set_title('Confusion Matrix')
@@ -778,31 +772,38 @@ elif app_mode == "Model Training":
778
 
779
  #Added section for model visualization
780
  st.subheader("Model Visualization")
 
 
 
 
 
 
781
 
782
- try: #All the plotting code here.
783
- if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
784
- #Feature Importance (Tree-based Models) and model selected was good
785
- importances = model.feature_importances_ # Assumed tree-based model
786
- feat_importances = pd.Series(importances, index=X_train.columns)
787
- feat_importances = feat_importances.nlargest(20)
788
 
789
- fig_feat, ax_feat = plt.subplots()
790
- feat_importances.plot(kind='barh', ax=ax_feat)
791
- ax_feat.set_xlabel('Relative Importance')
792
- ax_feat.set_ylabel('Features')
793
- ax_feat.set_title('Feature Importances')
794
- st.pyplot(fig_feat)
795
 
 
 
 
 
 
 
796
 
797
- #Create data that determines the learning and validation curve and what we have to add
798
  train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
799
 
800
- #Then add a plot for the learning curve and use st.pyplot
 
801
  train_mean = np.mean(train_scores, axis=1)
802
  train_std = np.std(train_scores, axis=1)
803
  valid_mean = np.mean(valid_scores, axis=1)
804
  valid_std = np.std(valid_scores, axis=1)
805
 
 
 
806
  fig_lc, ax_lc = plt.subplots()
807
  ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
808
  ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
@@ -815,8 +816,12 @@ elif app_mode == "Model Training":
815
  ax_lc.legend(loc='best')
816
  st.pyplot(fig_lc)
817
 
818
- except Exception as e: #Local error
819
- st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}")
 
 
 
 
820
 
821
  except Exception as e:
822
  st.error(f"An error occurred: {e}")
@@ -856,7 +861,6 @@ elif app_mode == "Model Training":
856
  from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
857
  accuracy = accuracy_score(y_test, y_pred)
858
  st.write(f"Accuracy: {accuracy:.4f}")
859
-
860
  except Exception as e: #local error
861
  st.error(f"An error occurred during model evaluation: {e}")
862
 
 
588
  except Exception as e:
589
  st.error(f"An error occurred during the T-test: {e}")
590
 
 
591
  elif app_mode == "Model Training":
592
  st.title("🚂 Model Training")
593
 
 
611
 
612
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
613
 
 
 
 
614
  if model_name == "Random Forest" and feature_columns: # Check if Random Forest and features are selected
615
  min_features = 1 # Ensure at least one feature is used
616
  max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
 
670
  X_train_selected = X_train_processed
671
  X_test_selected = X_test_processed
672
 
673
+ # Model Training and Hyperparameter Tuning
674
  if model_name == "Linear Regression":
675
  model = LinearRegression()
676
+ model.fit(X_train_selected, y_train)
677
+
678
  elif model_name == "Logistic Regression":
679
  model = LogisticRegression(max_iter=1000)
680
+ model.fit(X_train_selected, y_train)
681
  elif model_name == "Decision Tree":
682
  if problem_type == "Regression":
683
  model = DecisionTreeRegressor()
684
+ model.fit(X_train_selected, y_train)
685
  else:
686
  model = DecisionTreeClassifier()
687
+ model.fit(X_train_selected, y_train)
688
  elif model_name == "Random Forest":
689
+ if problem_type == "Regression":
690
+ model = RandomForestRegressor(random_state=42)
691
+ if 'param_grid' in locals():
692
+ grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
693
+ grid_search.fit(X_train_selected, y_train)
694
+ model = grid_search.best_estimator_
695
+ st.write("Best Parameters:", grid_search.best_params_)
696
+ else:
697
+ model = RandomForestRegressor(random_state=42) #define if no param_grid
698
+ model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
699
+
700
+ else:
701
+ model = RandomForestClassifier(random_state=42)
702
+ if 'param_grid' in locals():
703
+ grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
704
+ grid_search.fit(X_train_selected, y_train)
705
+ model = grid_search.best_estimator_
706
+ st.write("Best Parameters:", grid_search.best_params_)
707
+ else:
708
+ model = RandomForestClassifier(random_state=42) #define if no param_grid
709
+ model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
 
 
 
710
  elif model_name == "Gradient Boosting":
711
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
712
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
 
725
 
726
  # Store model and preprocessor
727
  st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
 
 
 
 
728
  st.session_state.preprocessor = preprocessor
729
 
730
  # Model Evaluation
 
761
 
762
  conf_matrix = confusion_matrix(y_test, y_pred)
763
 
764
+ #Heatmap
765
  fig_conf, ax_conf = plt.subplots()
766
+ sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax_conf)
767
  ax_conf.set_xlabel('Predicted Labels')
768
  ax_conf.set_ylabel('True Labels')
769
  ax_conf.set_title('Confusion Matrix')
 
772
 
773
  #Added section for model visualization
774
  st.subheader("Model Visualization")
775
+ #Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
776
+ if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
777
+ try: #All the plotting code here.
778
+ if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
779
+
780
+ #Make sure you use this inside of a conditional for classification, model, and tree based model.
781
 
782
+ #Feature Importance (Tree-based Models)
 
 
 
 
 
783
 
784
+ importances = model.feature_importances_ # Assumed tree-based model
785
+ feat_importances = pd.Series(importances, index=X_train.columns)
786
+ feat_importances = feat_importances.nlargest(20)
 
 
 
787
 
788
+ fig_feat, ax_feat = plt.subplots()
789
+ feat_importances.plot(kind='barh', ax=ax_feat)
790
+ ax_feat.set_xlabel('Relative Importance')
791
+ ax_feat.set_ylabel('Features')
792
+ ax_feat.set_title('Feature Importances')
793
+ st.pyplot(fig_feat)
794
 
795
+ #Create data that determines the learning and validation curve and what we have to add
796
  train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
797
 
798
+
799
+ #Take and define what this is for from the results that has been generated
800
  train_mean = np.mean(train_scores, axis=1)
801
  train_std = np.std(train_scores, axis=1)
802
  valid_mean = np.mean(valid_scores, axis=1)
803
  valid_std = np.std(valid_scores, axis=1)
804
 
805
+ #Plot each of the variables that has to be used.
806
+
807
  fig_lc, ax_lc = plt.subplots()
808
  ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training ' + ('Accuracy' if problem_type == "Classification" else "Neg MSE"))
809
  ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
 
816
  ax_lc.legend(loc='best')
817
  st.pyplot(fig_lc)
818
 
819
+
820
+ except Exception as e: #Local error
821
+ st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
822
+
823
+
824
+
825
 
826
  except Exception as e:
827
  st.error(f"An error occurred: {e}")
 
861
  from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
862
  accuracy = accuracy_score(y_test, y_pred)
863
  st.write(f"Accuracy: {accuracy:.4f}")
 
864
  except Exception as e: #local error
865
  st.error(f"An error occurred during model evaluation: {e}")
866