CosmickVisions commited on
Commit
576736c
·
verified ·
1 Parent(s): 1465dbd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -27
app.py CHANGED
@@ -603,25 +603,43 @@ elif app_mode == "Model Training":
603
  # Feature Selection
604
  feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
605
 
606
- # Model Selection
607
- model_name = st.selectbox("Select Model", [
608
- "Linear Regression", "Logistic Regression", "Decision Tree",
609
- "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN" # Expanded models
610
- ], help="Choose a model.")
 
 
611
 
612
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
613
 
614
- if model_name == "Random Forest" and feature_columns: # Check if Random Forest and features are selected
615
- min_features = 1 # Ensure at least one feature is used
616
- max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
 
617
  param_grid = {
618
- 'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="n_estimators1"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "n_estimators2")+1))),
619
  'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "max_depth2")+1))),
620
  'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key="min_samples_split1"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key = "min_samples_split2")+1))), #New hyperparameter
621
  'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key="min_samples_leaf1"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key = "min_samples_leaf2")+1))), #New hyperparameter
622
  }
623
 
624
- # Train-Test Split
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
625
  test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
626
 
627
  if st.button("Train Model"):
@@ -673,33 +691,27 @@ elif app_mode == "Model Training":
673
  # Model Training and Hyperparameter Tuning
674
  if model_name == "Linear Regression":
675
  model = LinearRegression()
676
- model.fit(X_train_selected, y_train)
677
-
678
  elif model_name == "Logistic Regression":
679
  model = LogisticRegression(max_iter=1000)
680
- model.fit(X_train_selected, y_train)
681
  elif model_name == "Decision Tree":
682
  if problem_type == "Regression":
683
  model = DecisionTreeRegressor()
684
- model.fit(X_train_selected, y_train)
685
  else:
686
  model = DecisionTreeClassifier()
687
- model.fit(X_train_selected, y_train)
688
  elif model_name == "Random Forest":
689
  if problem_type == "Regression":
690
  model = RandomForestRegressor(random_state=42)
691
- if 'param_grid' in locals():
692
  grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
693
  grid_search.fit(X_train_selected, y_train)
694
  model = grid_search.best_estimator_
695
  st.write("Best Parameters:", grid_search.best_params_)
696
  else:
697
- model = RandomForestRegressor(random_state=42) #define if no param_grid
698
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
699
 
700
  else:
701
  model = RandomForestClassifier(random_state=42)
702
- if 'param_grid' in locals():
703
  grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
704
  grid_search.fit(X_train_selected, y_train)
705
  model = grid_search.best_estimator_
@@ -760,7 +772,7 @@ elif app_mode == "Model Training":
760
  #Confusion Matrix
761
 
762
  conf_matrix = confusion_matrix(y_test, y_pred)
763
-
764
  # Assuming conf_matrix is your confusion matrix
765
  fig_conf, ax_conf = plt.subplots()
766
  sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf) # Corrected named argument
@@ -769,14 +781,14 @@ elif app_mode == "Model Training":
769
  ax_conf.set_title('Confusion Matrix')
770
  st.pyplot(fig_conf)
771
 
 
772
  #Added section for model visualization
773
  st.subheader("Model Visualization")
774
  #Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
775
  if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
776
  try: #All the plotting code here.
777
  if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
778
-
779
- #Make sure you use this inside of a conditional for classification, model, and tree based model.
780
 
781
  #Feature Importance (Tree-based Models)
782
 
@@ -794,7 +806,6 @@ elif app_mode == "Model Training":
794
  #Create data that determines the learning and validation curve and what we have to add
795
  train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
796
 
797
-
798
  #Take and define what this is for from the results that has been generated
799
  train_mean = np.mean(train_scores, axis=1)
800
  train_std = np.std(train_scores, axis=1)
@@ -815,13 +826,9 @@ elif app_mode == "Model Training":
815
  ax_lc.legend(loc='best')
816
  st.pyplot(fig_lc)
817
 
818
-
819
  except Exception as e: #Local error
820
  st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
821
 
822
-
823
-
824
-
825
  except Exception as e:
826
  st.error(f"An error occurred: {e}")
827
 
@@ -862,7 +869,6 @@ elif app_mode == "Model Training":
862
  st.write(f"Accuracy: {accuracy:.4f}")
863
  except Exception as e: #local error
864
  st.error(f"An error occurred during model evaluation: {e}")
865
-
866
  elif app_mode == "Predictions":
867
  st.title("🔮 Make Predictions")
868
 
 
603
  # Feature Selection
604
  feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
605
 
606
+ # Model Selection - Dynamic based on Problem Type
607
+ if problem_type == "Regression":
608
+ model_options = ["Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM"]
609
+ else: # Classification
610
+ model_options = ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN"]
611
+
612
+ model_name = st.selectbox("Select Model", model_options, help="Choose a model.")
613
 
614
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
615
 
616
+ # Hyperparameter Tuning - Dynamic based on Model Selection
617
+ param_grid = {} # Initialize to empty dictionary
618
+ if model_name == "Random Forest":
619
+ st.subheader("Random Forest Hyperparameters")
620
  param_grid = {
621
+ 'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="n_estimators"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "n_estimators2")+1))),
622
  'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "max_depth2")+1))),
623
  'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key="min_samples_split1"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key = "min_samples_split2")+1))), #New hyperparameter
624
  'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key="min_samples_leaf1"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key = "min_samples_leaf2")+1))), #New hyperparameter
625
  }
626
 
627
+ elif model_name == "Gradient Boosting":
628
+ st.subheader("Gradient Boosting Hyperparameters")
629
+ param_grid = {
630
+ 'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="gb_n_estimators1"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "gb_n_estimators2")+1))),
631
+ 'learning_rate': [st.slider("Learning Rate", 0.01, 1.0, 0.1, step=0.01, help="Learning rate", key = 'gb_learning_rate')], # Example, add more
632
+ 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="gb_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "gb_max_depth2")+1))),
633
+ }
634
+
635
+ elif model_name == "Decision Tree":
636
+ st.subheader("Decision Tree Hyperparameters")
637
+ param_grid = {
638
+ 'criterion': st.selectbox("Criterion", ["gini", "entropy"], help="Splitting criterion"),
639
+ 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="dt_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "dt_max_depth2")+1))),
640
+ }
641
+
642
+ # Train-Test Split
643
  test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
644
 
645
  if st.button("Train Model"):
 
691
  # Model Training and Hyperparameter Tuning
692
  if model_name == "Linear Regression":
693
  model = LinearRegression()
 
 
694
  elif model_name == "Logistic Regression":
695
  model = LogisticRegression(max_iter=1000)
 
696
  elif model_name == "Decision Tree":
697
  if problem_type == "Regression":
698
  model = DecisionTreeRegressor()
 
699
  else:
700
  model = DecisionTreeClassifier()
 
701
  elif model_name == "Random Forest":
702
  if problem_type == "Regression":
703
  model = RandomForestRegressor(random_state=42)
704
+ if 'param_grid' in locals() and param_grid: #added param_grid not empty condition
705
  grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error') # Example scoring
706
  grid_search.fit(X_train_selected, y_train)
707
  model = grid_search.best_estimator_
708
  st.write("Best Parameters:", grid_search.best_params_)
709
  else:
 
710
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
711
 
712
  else:
713
  model = RandomForestClassifier(random_state=42)
714
+ if 'param_grid' in locals()and param_grid: #added param_grid not empty condition
715
  grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
716
  grid_search.fit(X_train_selected, y_train)
717
  model = grid_search.best_estimator_
 
772
  #Confusion Matrix
773
 
774
  conf_matrix = confusion_matrix(y_test, y_pred)
775
+
776
  # Assuming conf_matrix is your confusion matrix
777
  fig_conf, ax_conf = plt.subplots()
778
  sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf) # Corrected named argument
 
781
  ax_conf.set_title('Confusion Matrix')
782
  st.pyplot(fig_conf)
783
 
784
+
785
  #Added section for model visualization
786
  st.subheader("Model Visualization")
787
  #Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
788
  if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
789
  try: #All the plotting code here.
790
  if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
791
+ #Make sure you use this inside of a conditional for classification, model, and tree based model.
 
792
 
793
  #Feature Importance (Tree-based Models)
794
 
 
806
  #Create data that determines the learning and validation curve and what we have to add
807
  train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
808
 
 
809
  #Take and define what this is for from the results that has been generated
810
  train_mean = np.mean(train_scores, axis=1)
811
  train_std = np.std(train_scores, axis=1)
 
826
  ax_lc.legend(loc='best')
827
  st.pyplot(fig_lc)
828
 
 
829
  except Exception as e: #Local error
830
  st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
831
 
 
 
 
832
  except Exception as e:
833
  st.error(f"An error occurred: {e}")
834
 
 
869
  st.write(f"Accuracy: {accuracy:.4f}")
870
  except Exception as e: #local error
871
  st.error(f"An error occurred during model evaluation: {e}")
 
872
  elif app_mode == "Predictions":
873
  st.title("🔮 Make Predictions")
874