Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Feb 28

Commit

576736c

verified ·

1 Parent(s): 1465dbd

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -27

app.py CHANGED Viewed

@@ -603,25 +603,43 @@ elif app_mode == "Model Training":
         # Feature Selection
         feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
-        # Model Selection
-        model_name = st.selectbox("Select Model", [
-            "Linear Regression", "Logistic Regression", "Decision Tree",
-            "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN" # Expanded models
-        ], help="Choose a model.")
         feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
-        if model_name == "Random Forest" and feature_columns:  # Check if Random Forest and features are selected
-            min_features = 1 # Ensure at least one feature is used
-            max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
             param_grid = {
-                'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="n_estimators1"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "n_estimators2")+1))),
                 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "max_depth2")+1))),
                 'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key="min_samples_split1"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key = "min_samples_split2")+1))), #New hyperparameter
                 'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key="min_samples_leaf1"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key = "min_samples_leaf2")+1))), #New hyperparameter
             }
-        # Train-Test Split
         test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
         if st.button("Train Model"):
@@ -673,33 +691,27 @@ elif app_mode == "Model Training":
                     # Model Training and Hyperparameter Tuning
                     if model_name == "Linear Regression":
                         model = LinearRegression()
-                        model.fit(X_train_selected, y_train)
                     elif model_name == "Logistic Regression":
                         model = LogisticRegression(max_iter=1000)
-                        model.fit(X_train_selected, y_train)
                     elif model_name == "Decision Tree":
                         if problem_type == "Regression":
                             model = DecisionTreeRegressor()
-                            model.fit(X_train_selected, y_train)
                         else:
                             model = DecisionTreeClassifier()
-                            model.fit(X_train_selected, y_train)
                     elif model_name == "Random Forest":
                         if problem_type == "Regression":
                             model = RandomForestRegressor(random_state=42)
-                            if 'param_grid' in locals():
                                 grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error')  # Example scoring
                                 grid_search.fit(X_train_selected, y_train)
                                 model = grid_search.best_estimator_
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
-                                model = RandomForestRegressor(random_state=42) #define if no param_grid
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
                         else:
                             model = RandomForestClassifier(random_state=42)
-                            if 'param_grid' in locals():
                                 grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
                                 grid_search.fit(X_train_selected, y_train)
                                 model = grid_search.best_estimator_
@@ -760,7 +772,7 @@ elif app_mode == "Model Training":
                         #Confusion Matrix
                         conf_matrix = confusion_matrix(y_test, y_pred)
                         # Assuming conf_matrix is your confusion matrix
                         fig_conf, ax_conf = plt.subplots()
                         sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)  # Corrected named argument
@@ -769,14 +781,14 @@ elif app_mode == "Model Training":
                         ax_conf.set_title('Confusion Matrix')
                         st.pyplot(fig_conf)
                     #Added section for model visualization
                     st.subheader("Model Visualization")
                     #Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
                     if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
                          try: #All the plotting code here.
                             if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
-                                 #Make sure you use this inside of a conditional for classification, model, and tree based model.
                                     #Feature Importance (Tree-based Models)
@@ -794,7 +806,6 @@ elif app_mode == "Model Training":
                             #Create data that determines the learning and validation curve and what we have to add
                             train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
                             #Take and define what this is for from the results that has been generated
                             train_mean = np.mean(train_scores, axis=1)
                             train_std = np.std(train_scores, axis=1)
@@ -815,13 +826,9 @@ elif app_mode == "Model Training":
                             ax_lc.legend(loc='best')
                             st.pyplot(fig_lc)
                          except Exception as e: #Local error
                             st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
@@ -862,7 +869,6 @@ elif app_mode == "Model Training":
                      st.write(f"Accuracy: {accuracy:.4f}")
             except Exception as e: #local error
                  st.error(f"An error occurred during model evaluation: {e}")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")

         # Feature Selection
         feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose features for training.")
+        # Model Selection - Dynamic based on Problem Type
+        if problem_type == "Regression":
+            model_options = ["Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM"]
+        else:  # Classification
+            model_options = ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN"]
+        model_name = st.selectbox("Select Model", model_options, help="Choose a model.")
         feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
+         # Hyperparameter Tuning - Dynamic based on Model Selection
+        param_grid = {}  # Initialize to empty dictionary
+        if model_name == "Random Forest":
+            st.subheader("Random Forest Hyperparameters")
             param_grid = {
+                'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="n_estimators"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "n_estimators2")+1))),
                 'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "max_depth2")+1))),
                 'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key="min_samples_split1"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node", key = "min_samples_split2")+1))), #New hyperparameter
                 'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key="min_samples_leaf1"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node", key = "min_samples_leaf2")+1))), #New hyperparameter
             }
+        elif model_name == "Gradient Boosting":
+             st.subheader("Gradient Boosting Hyperparameters")
+             param_grid = {
+                'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key="gb_n_estimators1"),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.", key = "gb_n_estimators2")+1))),
+                'learning_rate': [st.slider("Learning Rate", 0.01, 1.0, 0.1, step=0.01, help="Learning rate", key = 'gb_learning_rate')],  # Example, add more
+                'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="gb_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "gb_max_depth2")+1))),
+            }
+        elif model_name == "Decision Tree":
+            st.subheader("Decision Tree Hyperparameters")
+            param_grid = {
+                'criterion': st.selectbox("Criterion", ["gini", "entropy"], help="Splitting criterion"),
+                'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key="dt_max_depth1"),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.", key = "dt_max_depth2")+1))),
+            }
+         # Train-Test Split
         test_size = st.slider("Test Size", 0.1, 0.5, 0.2, help="Proportion of the dataset to include in the test split.")
         if st.button("Train Model"):
                     # Model Training and Hyperparameter Tuning
                     if model_name == "Linear Regression":
                         model = LinearRegression()
                     elif model_name == "Logistic Regression":
                         model = LogisticRegression(max_iter=1000)
                     elif model_name == "Decision Tree":
                         if problem_type == "Regression":
                             model = DecisionTreeRegressor()
                         else:
                             model = DecisionTreeClassifier()
                     elif model_name == "Random Forest":
                         if problem_type == "Regression":
                             model = RandomForestRegressor(random_state=42)
+                            if 'param_grid' in locals() and param_grid: #added param_grid not empty condition
                                 grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error')  # Example scoring
                                 grid_search.fit(X_train_selected, y_train)
                                 model = grid_search.best_estimator_
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
                         else:
                             model = RandomForestClassifier(random_state=42)
+                            if 'param_grid' in locals()and param_grid: #added param_grid not empty condition
                                 grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
                                 grid_search.fit(X_train_selected, y_train)
                                 model = grid_search.best_estimator_
                         #Confusion Matrix
                         conf_matrix = confusion_matrix(y_test, y_pred)
                         # Assuming conf_matrix is your confusion matrix
                         fig_conf, ax_conf = plt.subplots()
                         sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)  # Corrected named argument
                         ax_conf.set_title('Confusion Matrix')
                         st.pyplot(fig_conf)
                     #Added section for model visualization
                     st.subheader("Model Visualization")
                     #Use conditional to make sure that everything only executes when the data set is trained and not outside of it.
                     if st.session_state.model is not None: #Make sure that everything only executes when data set is trained and not outside of it.
                          try: #All the plotting code here.
                             if model_name in ["Random Forest", "Gradient Boosting"]:#Used list to define models.
+                                   #Make sure you use this inside of a conditional for classification, model, and tree based model.
                                     #Feature Importance (Tree-based Models)
                             #Create data that determines the learning and validation curve and what we have to add
                             train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type =="Classification" else 'neg_mean_squared_error', n_jobs=-1) #Define cross validation for run
                             #Take and define what this is for from the results that has been generated
                             train_mean = np.mean(train_scores, axis=1)
                             train_std = np.std(train_scores, axis=1)
                             ax_lc.legend(loc='best')
                             st.pyplot(fig_lc)
                          except Exception as e: #Local error
                             st.write(f"Visuals are only available for tree based models or if models are selected prior: {e}") #Write only if error
                 except Exception as e:
                     st.error(f"An error occurred: {e}")
                      st.write(f"Accuracy: {accuracy:.4f}")
             except Exception as e: #local error
                  st.error(f"An error occurred during model evaluation: {e}")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")