Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Feb 28

Commit

cff9e1f

verified ·

1 Parent(s): efd2599

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -26

app.py CHANGED Viewed

@@ -722,10 +722,6 @@ elif app_mode == "Model Training":
                     # Store model and preprocessor
                     st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
-                    st.session_state.preprocessor = preprocessor
-                     # Store model and preprocessor
-                    st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
                     st.session_state.preprocessor = preprocessor
                     # Model Evaluation
@@ -764,7 +760,7 @@ elif app_mode == "Model Training":
                         #Heatmap
                         fig_conf, ax_conf = plt.subplots()
-                        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax=ax_conf)
                         ax_conf.set_xlabel('Predicted Labels')
                         ax_conf.set_ylabel('True Labels')
                         ax_conf.set_title('Confusion Matrix')
@@ -774,11 +770,10 @@ elif app_mode == "Model Training":
                     #Added section for model visualization
                     st.subheader("Model Visualization")
-                    if problem_type == "Classification":
-                        try: #All the plotting code here.
                             #Added code that calculates the learning curves
-                            train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy')
                             #Then add a plot for the learning curve and use st.pyplot
                             train_mean = np.mean(train_scores, axis=1)
@@ -799,25 +794,61 @@ elif app_mode == "Model Training":
                             ax_lc.set_ylabel('Accuracy')
                             ax_lc.legend(loc='best')
                             st.pyplot(fig_lc)  # Display the figure in Streamlit
-                            #Feature Importance (Tree-based Models)
-                            if model_name in ["Random Forest", "Gradient Boosting"] : #Make sure its the correct type for extraction
-                                importances = model.feature_importances_ # Assumed tree-based model
-                                feat_importances = pd.Series(importances, index=X_train.columns)
-                                feat_importances = feat_importances.nlargest(20)
-                                fig_feat, ax_feat = plt.subplots()
-                                feat_importances.plot(kind='barh', ax=ax_feat)
-                                ax_feat.set_xlabel('Relative Importance')
-                                ax_feat.set_ylabel('Features')
-                                ax_feat.set_title('Feature Importances')
-                                st.pyplot(fig_feat)
-                        except Exception as e: #Local error
-                            st.write(f"Plotting functions requires tree based-models and for classification: {e}")
-                            else:
-                            st.write("Please upload and clean data first.")
        # Model Saving
         model_filename = st.text_input("Enter Model Filename (without extension)", "trained_model")
@@ -839,6 +870,7 @@ elif app_mode == "Model Training":
        #Model Evaluation Section
         if 'X_test' in locals() and st.session_state.model is not None:
             try: #Error catching with new test data
                 y_pred = st.session_state.model.predict(X_test)
                 if problem_type == "Regression":
@@ -850,9 +882,8 @@ elif app_mode == "Model Training":
                      from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
                      accuracy = accuracy_score(y_test, y_pred)
                      st.write(f"Accuracy: {accuracy:.4f}")
             except Exception as e: #local error
-                st.error(f"An error occurred during model evaluation: {e}")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")

                     # Store model and preprocessor
                     st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
                     st.session_state.preprocessor = preprocessor
                     # Model Evaluation
                         #Heatmap
                         fig_conf, ax_conf = plt.subplots()
+                        sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', ax_conf)
                         ax_conf.set_xlabel('Predicted Labels')
                         ax_conf.set_ylabel('True Labels')
                         ax_conf.set_title('Confusion Matrix')
                     #Added section for model visualization
                     st.subheader("Model Visualization")
+                    try: #All the plotting code here.
+                        if problem_type == "Classification" and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]:
                             #Added code that calculates the learning curves
+                            train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='accuracy',n_jobs = -1)
                             #Then add a plot for the learning curve and use st.pyplot
                             train_mean = np.mean(train_scores, axis=1)
                             ax_lc.set_ylabel('Accuracy')
                             ax_lc.legend(loc='best')
                             st.pyplot(fig_lc)  # Display the figure in Streamlit
+                            importances = model.feature_importances_ # Assumed tree-based model
+                            feat_importances = pd.Series(importances, index=X_train.columns)
+                            feat_importances = feat_importances.nlargest(20)
+                            fig_feat, ax_feat = plt.subplots()
+                            feat_importances.plot(kind='barh', ax=ax_feat)
+                            ax_feat.set_xlabel('Relative Importance')
+                            ax_feat.set_ylabel('Features')
+                            ax_feat.set_title('Feature Importances')
+                            st.pyplot(fig_feat)
+                         elif problem_type == "Regression" and model_name not in ["Linear Regression","Logistic Regression","SVM","Naive Bayes", "KNN"]: #graph regressions with regressor based models
+                            train_sizes, train_scores, valid_scores = learning_curve(model, X_train_selected, y_train, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
+                            #Then add a plot for the learning curve and use st.pyplot
+                            train_mean = np.mean(train_scores, axis=1)
+                            train_std = np.std(train_scores, axis=1)
+                            valid_mean = np.mean(valid_scores, axis=1)
+                            valid_std = np.std(valid_scores, axis=1)
+                            fig_lc, ax_lc = plt.subplots() #plot the curve in matplotlib
+                            ax_lc.plot(train_sizes, train_mean, color='blue', marker='o', markersize=5, label='Training neg_mean_squared_error')
+                            ax_lc.fill_between(train_sizes, train_mean + train_std, train_mean - train_std, alpha=0.15, color='blue')
+                            ax_lc.plot(train_sizes, valid_mean, color='green', linestyle='--', marker='s', markersize=5, label='Validation neg_mean_squared_error')
+                            ax_lc.fill_between(train_sizes, valid_mean + valid_std, valid_mean - valid_std, alpha=0.15, color='green')
+                            ax_lc.set_title('Learning Curves')
+                            ax_lc.set_xlabel('Training Set Size')
+                            ax_lc.set_ylabel('Neg Mean Squared Error')
+                            ax_lc.legend(loc='best')
+                            st.pyplot(fig_lc)  # Display the figure in Streamlit
+                            importances = model.feature_importances_ # Assumed tree-based model
+                            feat_importances = pd.Series(importances, index=X_train.columns)
+                            feat_importances = feat_importances.nlargest(20)
+                            fig_feat, ax_feat = plt.subplots()
+                            feat_importances.plot(kind='barh', ax=ax_feat)
+                            ax_feat.set_xlabel('Relative Importance')
+                            ax_feat.set_ylabel('Features')
+                            ax_feat.set_title('Feature Importances')
+                            st.pyplot(fig_feat)
+                    except Exception as e: #Local error
+                        st.write(f"Plotting functions requires tree based-models and for classification: {e}")
+                except Exception as e:
+                    st.error(f"An error occurred: {e}")
+    else:
+        st.write("Please upload and clean data first.")
        # Model Saving
         model_filename = st.text_input("Enter Model Filename (without extension)", "trained_model")
        #Model Evaluation Section
         if 'X_test' in locals() and st.session_state.model is not None:
             try: #Error catching with new test data
                 y_pred = st.session_state.model.predict(X_test)
                 if problem_type == "Regression":
                      from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
                      accuracy = accuracy_score(y_test, y_pred)
                      st.write(f"Accuracy: {accuracy:.4f}")
             except Exception as e: #local error
+                 st.error(f"An error occurred during model evaluation: {e}")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")