CosmickVisions commited on
Commit
531d1ee
·
verified ·
1 Parent(s): 4b8432e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -12
app.py CHANGED
@@ -603,7 +603,7 @@ elif app_mode == "Model Training":
603
  # Model Selection
604
  model_name = st.selectbox("Select Model", [
605
  "Linear Regression", "Logistic Regression", "Decision Tree",
606
- "Random Forest", "Gradient Boosting", "SVM"
607
  ], help="Choose a model.")
608
 
609
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
@@ -697,34 +697,107 @@ elif app_mode == "Model Training":
697
  st.write("Best Parameters:", grid_search.best_params_)
698
  else:
699
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
700
-
701
  elif model_name == "Gradient Boosting":
702
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
703
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
704
  elif model_name == "SVM":
705
  model = SVR() if problem_type == "Regression" else SVC()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
 
707
- # Cross-validation
708
- cv_scores = cross_val_score(model, X_train_selected, y_train, cv=5) #example, adjust cv
709
- st.write(f"Cross-validation scores: {cv_scores}")
710
- st.write(f"Mean cross-validation score: {cv_scores.mean():.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
711
 
712
- model.fit(X_train_selected, y_train)
713
 
714
- # Store model and preprocessor
715
- st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
716
- st.session_state.preprocessor = preprocessor
717
 
718
  # Model Evaluation
719
- y_pred = model.predict(X_test_selected)
720
  if problem_type == "Regression":
721
  mse = mean_squared_error(y_test, y_pred)
722
  r2 = r2_score(y_test, y_pred)
723
  st.write(f"Mean Squared Error: {mse:.4f}")
724
  st.write(f"R-squared: {r2:.4f}")
725
  else:
 
 
 
 
 
726
  accuracy = accuracy_score(y_test, y_pred)
 
 
 
727
  st.write(f"Accuracy: {accuracy:.4f}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
728
 
729
  st.success("Model trained successfully!")
730
 
@@ -751,7 +824,7 @@ elif app_mode == "Model Training":
751
  st.error(f"Error loading model: {e}")
752
 
753
  #Model Evaluation Section
754
- if 'X_test' in locals() and st.session_state.model is not None:
755
  y_pred = st.session_state.model.predict(X_test)
756
 
757
  if problem_type == "Regression":
 
603
  # Model Selection
604
  model_name = st.selectbox("Select Model", [
605
  "Linear Regression", "Logistic Regression", "Decision Tree",
606
+ "Random Forest", "Gradient Boosting", "SVM", "Naive Bayes", "KNN" # Expanded models
607
  ], help="Choose a model.")
608
 
609
  feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
 
697
  st.write("Best Parameters:", grid_search.best_params_)
698
  else:
699
  model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
 
700
  elif model_name == "Gradient Boosting":
701
  from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
702
  model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
703
  elif model_name == "SVM":
704
  model = SVR() if problem_type == "Regression" else SVC()
705
+ elif model_name == "Naive Bayes":
706
+ from sklearn.naive_bayes import GaussianNB
707
+ model = GaussianNB()
708
+ elif model_name == "KNN":
709
+ from sklearn.neighbors import KNeighborsClassifier
710
+ model = KNeighborsClassifier()
711
+
712
+ # Cost-Sensitive Classification
713
+ cost_matrix = None
714
+ if problem_type == "Classification":
715
+ cost_sensitive = st.checkbox("Enable Cost-Sensitive Classification") #new
716
+
717
+ if cost_sensitive:
718
+ #Get class labels
719
+ classes = np.unique(y_train)
720
+
721
+ #Create a matrix, with default cost being 1
722
+ cost_matrix = np.ones((len(classes),len(classes)))
723
+
724
+ #Cost of correct predictions are 0
725
+ np.fill_diagonal(cost_matrix, 0)
726
+
727
+ #Allow for individual weight specification
728
+ st.write("Define misclassification costs:")
729
+
730
+ for i in range(len(classes)):
731
+ for j in range(len(classes)):
732
+ if i != j:
733
+ cost_matrix[i,j] = st.number_input(f"Cost of classifying {classes[i]} as {classes[j]}", value=1.0, min_value=0.0)
734
+
735
 
736
+ #Threshold adjustment options, only shows up for log regression
737
+
738
+ if model_name == "Logistic Regression" and problem_type == "Classification":
739
+ threshold = st.slider("Select Threshold", 0.0, 1.0, 0.5, 0.01, help="Adjust the classification threshold")
740
+ model = LogisticRegression(max_iter=1000)
741
+
742
+ model.fit(X_train_selected, y_train) #Fit model
743
+
744
+ #Adjust predictions according to threshold and make new variables
745
+ y_pred_prob = model.predict_proba(X_test_selected)[:,1]
746
+ y_pred = (y_pred_prob > threshold).astype(int)
747
+
748
+
749
+ else:
750
+ # Cross-validation
751
+ cv_scores = cross_val_score(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type == "Classification" else 'neg_mean_squared_error') #example, adjust cv
752
+ st.write(f"Cross-validation scores: {cv_scores}")
753
+ st.write(f"Mean cross-validation score: {cv_scores.mean():.4f}")
754
+
755
+ model.fit(X_train_selected, y_train)
756
+
757
+ # Store model and preprocessor
758
+ st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
759
+ st.session_state.preprocessor = preprocessor
760
+
761
+ y_pred = model.predict(X_test_selected)
762
 
 
763
 
 
 
 
764
 
765
  # Model Evaluation
766
+
767
  if problem_type == "Regression":
768
  mse = mean_squared_error(y_test, y_pred)
769
  r2 = r2_score(y_test, y_pred)
770
  st.write(f"Mean Squared Error: {mse:.4f}")
771
  st.write(f"R-squared: {r2:.4f}")
772
  else:
773
+ from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
774
+
775
+ #Weighted averaging for metrics for multiclass
776
+ average_method = "weighted" #changed from None
777
+
778
  accuracy = accuracy_score(y_test, y_pred)
779
+ precision = precision_score(y_test, y_pred, average = average_method, zero_division = 0)
780
+ recall = recall_score(y_test, y_pred, average = average_method, zero_division = 0)
781
+ f1 = f1_score(y_test, y_pred, average = average_method, zero_division = 0)
782
  st.write(f"Accuracy: {accuracy:.4f}")
783
+ st.write(f"Precision: {precision:.4f}")
784
+ st.write(f"Recall: {recall:.4f}")
785
+ st.write(f"F1 Score: {f1:.4f}")
786
+ st.write("Classification Report:")
787
+ st.text(classification_report(y_test, y_pred, zero_division = 0))
788
+
789
+
790
+ #Confusion Matrix
791
+
792
+ conf_matrix = confusion_matrix(y_test, y_pred)
793
+
794
+ #Heatmap
795
+ fig_conf, ax_conf = plt.subplots()
796
+ sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)
797
+ ax_conf.set_xlabel('Predicted Labels')
798
+ ax_conf.set_ylabel('True Labels')
799
+ ax_conf.set_title('Confusion Matrix')
800
+ st.pyplot(fig_conf)
801
 
802
  st.success("Model trained successfully!")
803
 
 
824
  st.error(f"Error loading model: {e}")
825
 
826
  #Model Evaluation Section
827
+ if 'X_test' in locals() and st.session_state.model is not None and problem_type == "Regression":
828
  y_pred = st.session_state.model.predict(X_test)
829
 
830
  if problem_type == "Regression":