CosmickVisions commited on
Commit
d429dc4
·
verified ·
1 Parent(s): 8d6760a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -24
app.py CHANGED
@@ -594,6 +594,14 @@ elif app_mode == "Model Training":
594
  if st.session_state.cleaned_data is not None:
595
  df = st.session_state.cleaned_data.copy()
596
 
 
 
 
 
 
 
 
 
597
  # Target Variable Selection
598
  target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
599
 
@@ -613,30 +621,35 @@ elif app_mode == "Model Training":
613
 
614
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
615
 
616
- # Hyperparameter Tuning - Dynamic based on Model Selection
617
  param_grid = {} # Initialize to empty dictionary
 
 
 
618
  if model_name == "Random Forest":
619
  st.subheader("Random Forest Hyperparameters")
620
  param_grid = {
621
- 'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="n_estimators"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "n_estimators2")+1))),
622
- 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "max_depth2")+1))),
623
- 'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key="min_samples_split1"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key = "min_samples_split2")+1))), #New hyperparameter
624
- 'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key="min_samples_leaf1"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key = "min_samples_leaf2")+1))), #New hyperparameter
 
625
  }
626
 
627
  elif model_name == "Gradient Boosting":
628
- st.subheader("Gradient Boosting Hyperparameters")
629
- param_grid = {
630
- 'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="gb_n_estimators1"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "gb_n_estimators2")+1))),
631
- 'learning_rate': [st.slider("Learning Rate", 0.01, 1.0, 0.1, step=0.01, help="Learning rate", key = 'gb_learning_rate')], # Example, add more
632
- 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="gb_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "gb_max_depth2")+1))),
 
633
  }
634
 
635
  elif model_name == "Decision Tree":
636
  st.subheader("Decision Tree Hyperparameters")
637
  param_grid = {
638
- 'criterion': st.selectbox("Criterion", ["gini", "entropy"], help="Splitting criterion"),
639
- 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="dt_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "dt_max_depth2")+1))),
640
  }
641
 
642
  # Train-Test Split
@@ -691,13 +704,18 @@ elif app_mode == "Model Training":
691
  # Model Training and Hyperparameter Tuning
692
  if model_name == "Linear Regression":
693
  model = LinearRegression()
 
 
694
  elif model_name == "Logistic Regression":
695
  model = LogisticRegression(max_iter=1000)
 
696
  elif model_name == "Decision Tree":
697
  if problem_type == "Regression":
698
  model = DecisionTreeRegressor()
 
699
  else:
700
  model = DecisionTreeClassifier()
 
701
  elif model_name == "Random Forest":
702
  if problem_type == "Regression":
703
  model = RandomForestRegressor(random_state=42)
@@ -707,6 +725,7 @@ elif app_mode == "Model Training":
707
  model = grid_search.best_estimator_
708
  st.write("Best Parameters:", grid_search.best_params_)
709
  else:
 
710
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
711
 
712
  else:
@@ -739,6 +758,12 @@ elif app_mode == "Model Training":
739
  st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
740
  st.session_state.preprocessor = preprocessor
741
 
 
 
 
 
 
 
742
  # Model Evaluation
743
  y_pred = model.predict(X_test_selected)
744
  if problem_type == "Regression":
@@ -773,9 +798,9 @@ elif app_mode == "Model Training":
773
 
774
  conf_matrix = confusion_matrix(y_test, y_pred)
775
 
776
- # Assuming conf_matrix is your confusion matrix
777
  fig_conf, ax_conf = plt.subplots()
778
- sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf) # Corrected named argument
779
  ax_conf.set_xlabel('Predicted Labels')
780
  ax_conf.set_ylabel('True Labels')
781
  ax_conf.set_title('Confusion Matrix')
@@ -788,7 +813,7 @@ elif app_mode == "Model Training":
788
  if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
789
  try: #All the plotting code here.
790
  if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
791
- #Make sure you use this inside of a conditional for classification, model, and tree based model.
792
 
793
  #Feature Importance (Tree-based Models)
794
 
@@ -806,7 +831,7 @@ elif app_mode == "Model Training":
806
  #Create data that determines the learning and validation curve and what we have to add
807
  train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
808
 
809
- #Take and define what this is for from the results that has been generated
810
  train_mean = np.mean(train_scores, axis=1)
811
  train_std = np.std(train_scores, axis=1)
812
  valid_mean = np.mean(valid_scores, axis=1)
@@ -852,20 +877,19 @@ elif app_mode == "Model Training":
852
  except Exception as e:
853
  st.error(f"Error loading model: {e}")
854
 
855
- #Model Evaluation Section
856
- if 'X_test' in locals() and st.session_state.model is not None:
857
- try: #Error catching with new test data
858
-
859
- y_pred = st.session_state.model.predict(X_test)
860
 
861
  if problem_type == "Regression":
862
- mse = mean_squared_error(y_test, y_pred)
863
- r2 = r2_score(y_test, y_pred)
864
  st.write(f"Mean Squared Error: {mse:.4f}")
865
  st.write(f"R-squared: {r2:.4f}")
866
  else:
867
  from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
868
- accuracy = accuracy_score(y_test, y_pred)
869
  st.write(f"Accuracy: {accuracy:.4f}")
870
  except Exception as e: #local error
871
  st.error(f"An error occurred during model evaluation: {e}")
 
594
  if st.session_state.cleaned_data is not None:
595
  df = st.session_state.cleaned_data.copy()
596
 
597
+ # Initialize session state for train/test split
598
+ if 'X_train_selected' not in st.session_state:
599
+ st.session_state.X_train_selected = None
600
+ st.session_state.X_test_selected = None
601
+ st.session_state.y_train = None
602
+ st.session_state.y_test = None
603
+ st.session_state.model = None # Initialize model in session state
604
+
605
  # Target Variable Selection
606
  target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
607
 
 
621
 
622
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
623
 
624
+ # Hyperparameter Tuning - Dynamic based on Model Selection
625
  param_grid = {} # Initialize to empty dictionary
626
+
627
+ #Define different paramter values for the model so it works. This is not an optimized number
628
+ #The goal is to make sure that all visualizations and graphs work as is.
629
  if model_name == "Random Forest":
630
  st.subheader("Random Forest Hyperparameters")
631
  param_grid = {
632
+ 'n_estimators': list(range(100, 101)), #Used 100 so model is trained and not empty and all visuals work
633
+
634
+ 'max_depth': list(range(10,11)), #default value 10 so its in model
635
+ 'min_samples_split': list(range(2,3)), #New hyperparameter default 2
636
+ 'min_samples_leaf': list(range(1,2)), #New hyperparameter default 1
637
  }
638
 
639
  elif model_name == "Gradient Boosting":
640
+ st.subheader("Gradient Boosting Hyperparameters")
641
+ param_grid = {
642
+ 'n_estimators': list(range(100, 101)),
643
+ 'learning_rate': [0.1],
644
+ 'max_depth': list(range(3,4))
645
+
646
  }
647
 
648
  elif model_name == "Decision Tree":
649
  st.subheader("Decision Tree Hyperparameters")
650
  param_grid = {
651
+ 'criterion': ["gini"],
652
+ 'max_depth': list(range(3,4)),
653
  }
654
 
655
  # Train-Test Split
 
704
  # Model Training and Hyperparameter Tuning
705
  if model_name == "Linear Regression":
706
  model = LinearRegression()
707
+ model.fit(X_train_selected, y_train)
708
+
709
  elif model_name == "Logistic Regression":
710
  model = LogisticRegression(max_iter=1000)
711
+ model.fit(X_train_selected, y_train)
712
  elif model_name == "Decision Tree":
713
  if problem_type == "Regression":
714
  model = DecisionTreeRegressor()
715
+ model.fit(X_train_selected, y_train)
716
  else:
717
  model = DecisionTreeClassifier()
718
+ model.fit(X_train_selected, y_train)
719
  elif model_name == "Random Forest":
720
  if problem_type == "Regression":
721
  model = RandomForestRegressor(random_state=42)
 
725
  model = grid_search.best_estimator_
726
  st.write("Best Parameters:", grid_search.best_params_)
727
  else:
728
+ model = RandomForestRegressor(random_state=42) #define if no param_grid
729
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
730
 
731
  else:
 
758
  st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
759
  st.session_state.preprocessor = preprocessor
760
 
761
+ #Store the test data
762
+ st.session_state.X_train_selected = X_train_selected
763
+ st.session_state.X_test_selected = X_test_selected
764
+ st.session_state.y_train = y_train
765
+ st.session_state.y_test = y_test
766
+
767
  # Model Evaluation
768
  y_pred = model.predict(X_test_selected)
769
  if problem_type == "Regression":
 
798
 
799
  conf_matrix = confusion_matrix(y_test, y_pred)
800
 
801
+ #Heatmap
802
  fig_conf, ax_conf = plt.subplots()
803
+ sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)
804
  ax_conf.set_xlabel('Predicted Labels')
805
  ax_conf.set_ylabel('True Labels')
806
  ax_conf.set_title('Confusion Matrix')
 
813
  if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
814
  try: #All the plotting code here.
815
  if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
816
+ #Make sure you use this inside of a conditional for classification, model, and tree based model.
817
 
818
  #Feature Importance (Tree-based Models)
819
 
 
831
  #Create data that determines the learning and validation curve and what we have to add
832
  train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
833
 
834
+ #Then add a plot for the learning curve and use st.pyplot
835
  train_mean = np.mean(train_scores, axis=1)
836
  train_std = np.std(train_scores, axis=1)
837
  valid_mean = np.mean(valid_scores, axis=1)
 
877
  except Exception as e:
878
  st.error(f"Error loading model: {e}")
879
 
880
+ #Model Evaluation Section - run on the saved model
881
+ if st.session_state.model is not None and st.session_state.X_test_selected is not None: # added check to make sure it is a loaded model
882
+ try:
883
+ y_pred = st.session_state.model.predict(st.session_state.X_test_selected) # load from stored
 
884
 
885
  if problem_type == "Regression":
886
+ mse = mean_squared_error(st.session_state.y_test, y_pred)
887
+ r2 = r2_score(st.session_state.y_test, y_pred)
888
  st.write(f"Mean Squared Error: {mse:.4f}")
889
  st.write(f"R-squared: {r2:.4f}")
890
  else:
891
  from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
892
+ accuracy = accuracy_score(st.session_state.y_test, y_pred)
893
  st.write(f"Accuracy: {accuracy:.4f}")
894
  except Exception as e: #local error
895
  st.error(f"An error occurred during model evaluation: {e}")