Spaces:

CosmickVisions
/

Data-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Feb 28

Commit

339e41b

verified ·

1 Parent(s): 531d1ee

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -116

app.py CHANGED Viewed

@@ -670,13 +670,18 @@ elif app_mode == "Model Training":
                     # Model Training and Hyperparameter Tuning
                     if model_name == "Linear Regression":
                         model = LinearRegression()
                     elif model_name == "Logistic Regression":
                         model = LogisticRegression(max_iter=1000)
                     elif model_name == "Decision Tree":
                         if problem_type == "Regression":
                             model = DecisionTreeRegressor()
                         else:
                             model = DecisionTreeClassifier()
                     elif model_name == "Random Forest":
                         if problem_type == "Regression":
                             model = RandomForestRegressor(random_state=42)
@@ -700,70 +705,29 @@ elif app_mode == "Model Training":
                     elif model_name == "Gradient Boosting":
                         from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
                     elif model_name == "SVM":
                         model = SVR() if problem_type == "Regression" else SVC()
                     elif model_name == "Naive Bayes":
                          from sklearn.naive_bayes import GaussianNB
                          model = GaussianNB()
                     elif model_name == "KNN":
                          from sklearn.neighbors import KNeighborsClassifier
                          model = KNeighborsClassifier()
-                    # Cost-Sensitive Classification
-                    cost_matrix = None
-                    if problem_type == "Classification":
-                        cost_sensitive = st.checkbox("Enable Cost-Sensitive Classification") #new
-                        if cost_sensitive:
-                            #Get class labels
-                            classes = np.unique(y_train)
-                            #Create a matrix, with default cost being 1
-                            cost_matrix = np.ones((len(classes),len(classes)))
-                            #Cost of correct predictions are 0
-                            np.fill_diagonal(cost_matrix, 0)
-                            #Allow for individual weight specification
-                            st.write("Define misclassification costs:")
-                            for i in range(len(classes)):
-                                for j in range(len(classes)):
-                                    if i != j:
-                                        cost_matrix[i,j] = st.number_input(f"Cost of classifying {classes[i]} as {classes[j]}", value=1.0, min_value=0.0)
-                    #Threshold adjustment options, only shows up for log regression
-                    if model_name == "Logistic Regression" and problem_type == "Classification":
-                        threshold = st.slider("Select Threshold", 0.0, 1.0, 0.5, 0.01, help="Adjust the classification threshold")
-                        model = LogisticRegression(max_iter=1000)
-                        model.fit(X_train_selected, y_train) #Fit model
-                        #Adjust predictions according to threshold and make new variables
-                        y_pred_prob = model.predict_proba(X_test_selected)[:,1]
-                        y_pred = (y_pred_prob > threshold).astype(int)
-                    else:
-                        # Cross-validation
-                        cv_scores = cross_val_score(model, X_train_selected, y_train, cv=5, scoring='accuracy' if problem_type == "Classification" else 'neg_mean_squared_error') #example, adjust cv
-                        st.write(f"Cross-validation scores: {cv_scores}")
-                        st.write(f"Mean cross-validation score: {cv_scores.mean():.4f}")
-                        model.fit(X_train_selected, y_train)
-                         # Store model and preprocessor
-                        st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
-                        st.session_state.preprocessor = preprocessor
-                        y_pred = model.predict(X_test_selected)
                     # Model Evaluation
                     if problem_type == "Regression":
                         mse = mean_squared_error(y_test, y_pred)
                         r2 = r2_score(y_test, y_pred)
@@ -799,10 +763,6 @@ elif app_mode == "Model Training":
                         ax_conf.set_title('Confusion Matrix')
                         st.pyplot(fig_conf)
-                    st.success("Model trained successfully!")
-                except Exception as e:
-                    st.error(f"An error occurred: {e}")
     else:
         st.write("Please upload and clean data first.")
@@ -833,6 +793,11 @@ elif app_mode == "Model Training":
                 st.write(f"Mean Squared Error: {mse:.4f}")
                 st.write(f"R-squared: {r2:.4f}")
             else:
                 accuracy = accuracy_score(y_test, y_pred)
                 st.write(f"Accuracy: {accuracy:.4f}")
@@ -906,67 +871,7 @@ elif app_mode == "Predictions":
     else:
         st.write("Please train a model first in the 'Model Training' section.")
-elif app_mode == "Predictions":
-    st.title("🔮 Make Predictions")
-    if st.session_state.model is not None and st.session_state.cleaned_data is not None:
-        df = st.session_state.cleaned_data.copy()
-        # Input data for prediction
-        st.subheader("Enter Data for Prediction")
-        input_data = {}
-        model_columns = st.session_state.model.steps[0][1].transformers_[0][2] + st.session_state.model.steps[0][1].transformers_[1][2]
-        if not set(model_columns).issubset(set(df.drop(columns=[st.session_state.model.steps[-1][0]]).columns)):
-            st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
-            st.stop()
-        for col in model_columns:
-            if pd.api.types.is_numeric_dtype(df[col]):
-                input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
-            else:
-                input_data[col] = st.selectbox(f"Select {col}", df[col].unique())
-        # Prediction Button
-        if st.button("Make Prediction"):
-            try:
-                input_df = pd.DataFrame([input_data])
-                prediction = st.session_state.model.predict(input_df)[0]
-                st.subheader("Prediction Result")
-                st.write(f"The predicted value is: {prediction}")
-                # Additional Feedback (Example for Classification)
-                if isinstance(st.session_state.model.steps[-1][1], LogisticRegression):
-                    probabilities = st.session_state.model.predict_proba(input_df)[0]
-                    st.write("Predicted Probabilities:")
-                    st.write(probabilities)
-            except Exception as e:
-                st.error(f"An error occurred during prediction: {e}")
-        #Add batch prediction section in prediction tab
-        st.subheader("Batch Predictions")
-        batch_file = st.file_uploader("Upload CSV for Batch Predictions", type=["csv"])
-        if batch_file is not None:
-            try:
-                batch_df = pd.read_csv(batch_file)
-                # Preprocess the batch data
-                batch_processed = st.session_state.preprocessor.transform(batch_df)
-                # Make predictions
-                batch_predictions = st.session_state.model.predict(batch_processed)
-                batch_df['Prediction'] = batch_predictions
-                st.dataframe(batch_df)
-             # Download predictions
-                csv = batch_df.to_csv(index=False)
-                b64 = base64.b64encode(csv.encode()).decode()  # some strings
-                href = f'<a href="data:file/csv;base64,{b64}" download="predictions.csv">Download Predictions CSV</a>'
-                st.markdown(href, unsafe_allow_html=True)
-            except Exception as e:
-                st.error(f"Error processing batch file: {e}")
-    else:
-        st.write("Please train a model first in the 'Model Training' section.")
 elif app_mode == "Visualization Lab":
     st.title("🔬 Advanced Data Visualization and Clustering Lab")

                     # Model Training and Hyperparameter Tuning
                     if model_name == "Linear Regression":
                         model = LinearRegression()
+                        model.fit(X_train_selected, y_train)
                     elif model_name == "Logistic Regression":
                         model = LogisticRegression(max_iter=1000)
+                        model.fit(X_train_selected, y_train)
                     elif model_name == "Decision Tree":
                         if problem_type == "Regression":
                             model = DecisionTreeRegressor()
+                            model.fit(X_train_selected, y_train)
                         else:
                             model = DecisionTreeClassifier()
+                            model.fit(X_train_selected, y_train)
                     elif model_name == "Random Forest":
                         if problem_type == "Regression":
                             model = RandomForestRegressor(random_state=42)
                     elif model_name == "Gradient Boosting":
                         from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
+                        model.fit(X_train_selected, y_train)
                     elif model_name == "SVM":
                         model = SVR() if problem_type == "Regression" else SVC()
+                        model.fit(X_train_selected, y_train)
                     elif model_name == "Naive Bayes":
                          from sklearn.naive_bayes import GaussianNB
                          model = GaussianNB()
+                         model.fit(X_train_selected, y_train)
                     elif model_name == "KNN":
                          from sklearn.neighbors import KNeighborsClassifier
                          model = KNeighborsClassifier()
+                         model.fit(X_train_selected, y_train)
+                    # Store model and preprocessor
+                    st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
+                    st.session_state.preprocessor = preprocessor
+                     # Store model and preprocessor
+                    st.session_state.model = Pipeline(steps=[('preprocessor', preprocessor), ('model', model)])
+                    st.session_state.preprocessor = preprocessor
                     # Model Evaluation
+                    y_pred = model.predict(X_test_selected)
                     if problem_type == "Regression":
                         mse = mean_squared_error(y_test, y_pred)
                         r2 = r2_score(y_test, y_pred)
                         ax_conf.set_title('Confusion Matrix')
                         st.pyplot(fig_conf)
     else:
         st.write("Please upload and clean data first.")
                 st.write(f"Mean Squared Error: {mse:.4f}")
                 st.write(f"R-squared: {r2:.4f}")
             else:
+                from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve, classification_report #Import here to avoid library bloat
+                #Weighted averaging for metrics for multiclass
+                average_method = "weighted" #changed from None
                 accuracy = accuracy_score(y_test, y_pred)
                 st.write(f"Accuracy: {accuracy:.4f}")
     else:
         st.write("Please train a model first in the 'Model Training' section.")
 elif app_mode == "Visualization Lab":
     st.title("🔬 Advanced Data Visualization and Clustering Lab")