Spaces:

CosmickVisions
/

Data-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Feb 28

Commit

531d1ee

verified ·

1 Parent(s): 4b8432e

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -12

app.py CHANGED Viewed

@@ -603,7 +603,7 @@ elif app_mode == "Model Training":
         # Model Selection
         model_name = st.selectbox("Select Model", [
             "Linear Regression", "Logistic Regression", "Decision Tree",
-            "Random Forest", "Gradient Boosting", "SVM"
         ], help="Choose a model.")
         feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
@@ -697,34 +697,107 @@ elif app_mode == "Model Training":
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
                     elif model_name == "Gradient Boosting":
                         from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
                     elif model_name == "SVM":
                         model = SVR() if problem_type == "Regression" else SVC()
-                    # Cross-validation
-                    cv_scores = cross_val_score(model, X_train_selected, y_train, cv=5) #example, adjust cv
-                    st.write(f"Cross-validation scores: {cv_scores}")
-                    st.write(f"Mean cross-validation score: {cv_scores.mean():.4f}")
-                    model.fit(X_train_selected, y_train)
-                    # Store model and preprocessor
-                    st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
-                    st.session_state.preprocessor = preprocessor
                     # Model Evaluation
-                    y_pred = model.predict(X_test_selected)
                     if problem_type == "Regression":
                         mse = mean_squared_error(y_test, y_pred)
                         r2 = r2_score(y_test, y_pred)
                         st.write(f"Mean Squared Error: {mse:.4f}")
                         st.write(f"R-squared: {r2:.4f}")
                     else:
                         accuracy = accuracy_score(y_test, y_pred)
                         st.write(f"Accuracy: {accuracy:.4f}")
                     st.success("Model trained successfully!")
@@ -751,7 +824,7 @@ elif app_mode == "Model Training":
                 st.error(f"Error loading model: {e}")
        #Model Evaluation Section
-        if 'X_test' in locals() and st.session_state.model is not None:
             y_pred = st.session_state.model.predict(X_test)
             if problem_type == "Regression":

         # Model Selection
         model_name = st.selectbox("Select Model", [
             "Linear Regression", "Logistic Regression", "Decision Tree",
+            "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN" # Expanded models
         ], help="Choose a model.")
         feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
                                 st.write("Best Parameters:", grid_search.best_params_)
                             else:
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
                     elif model_name == "Gradient Boosting":
                         from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
                     elif model_name == "SVM":
                         model = SVR() if problem_type == "Regression" else SVC()
+                    elif model_name == "Naive Bayes":
+                         from sklearn.naive_bayes import GaussianNB
+                         model = GaussianNB()
+                    elif model_name == "KNN":
+                         from sklearn.neighbors import KNeighborsClassifier
+                         model = KNeighborsClassifier()
+                    # Cost-Sensitive Classification
+                    cost_matrix = None
+                    if problem_type == "Classification":
+                        cost_sensitive = st.checkbox("Enable Cost-Sensitive Classification") #new
+                        if cost_sensitive:
+                            #Get class labels
+                            classes = np.unique(y_train)
+                            #Create a matrix, with default cost being 1
+                            cost_matrix = np.ones((len(classes),len(classes)))
+                            #Cost of correct predictions are 0
+                            np.fill_diagonal(cost_matrix, 0)
+                            #Allow for individual weight specification
+                            st.write("Define misclassification costs:")
+                            for i in range(len(classes)):
+                                for j in range(len(classes)):
+                                    if i != j:
+                                        cost_matrix[i,j] = st.number_input(f"Cost of classifying {classes[i]} as {classes[j]}", value=1.0, min_value=0.0)
+                    #Threshold adjustment options, only shows up for log regression
+                    if model_name == "Logistic Regression" and problem_type == "Classification":
+                        threshold = st.slider("Select Threshold", 0.0, 1.0, 0.5, 0.01, help="Adjust the classification threshold")
+                        model = LogisticRegression(max_iter=1000)
+                        model.fit(X_train_selected, y_train) #Fit model
+                        #Adjust predictions according to threshold and make new variables
+                        y_pred_prob = model.predict_proba(X_test_selected)[:,1]
+                        y_pred = (y_pred_prob > threshold).astype(int)
+                    else:
+                        # Cross-validation
+                        cv_scores = cross_val_score(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type == "Classification" else 'neg_mean_squared_error') #example, adjust cv
+                        st.write(f"Cross-validation scores: {cv_scores}")
+                        st.write(f"Mean cross-validation score: {cv_scores.mean():.4f}")
+                        model.fit(X_train_selected, y_train)
+                         # Store model and preprocessor
+                        st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
+                        st.session_state.preprocessor = preprocessor
+                        y_pred = model.predict(X_test_selected)
                     # Model Evaluation
                     if problem_type == "Regression":
                         mse = mean_squared_error(y_test, y_pred)
                         r2 = r2_score(y_test, y_pred)
                         st.write(f"Mean Squared Error: {mse:.4f}")
                         st.write(f"R-squared: {r2:.4f}")
                     else:
+                        from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
+                        #Weighted averaging for metrics for multiclass
+                        average_method = "weighted" #changed from None
                         accuracy = accuracy_score(y_test, y_pred)
+                        precision = precision_score(y_test, y_pred, average = average_method, zero_division = 0)
+                        recall = recall_score(y_test, y_pred, average = average_method, zero_division = 0)
+                        f1 = f1_score(y_test, y_pred, average = average_method, zero_division = 0)
                         st.write(f"Accuracy: {accuracy:.4f}")
+                        st.write(f"Precision: {precision:.4f}")
+                        st.write(f"Recall: {recall:.4f}")
+                        st.write(f"F1 Score: {f1:.4f}")
+                        st.write("Classification Report:")
+                        st.text(classification_report(y_test, y_pred, zero_division = 0))
+                        #Confusion Matrix
+                        conf_matrix = confusion_matrix(y_test, y_pred)
+                        #Heatmap
+                        fig_conf, ax_conf = plt.subplots()
+                        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)
+                        ax_conf.set_xlabel('Predicted Labels')
+                        ax_conf.set_ylabel('True Labels')
+                        ax_conf.set_title('Confusion Matrix')
+                        st.pyplot(fig_conf)
                     st.success("Model trained successfully!")
                 st.error(f"Error loading model: {e}")
        #Model Evaluation Section
+        if 'X_test' in locals() and st.session_state.model is not None and problem_type == "Regression":
             y_pred = st.session_state.model.predict(X_test)
             if problem_type == "Regression":