Spaces:

CosmickVisions
/

Data-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Feb 28

Commit

c7e14fb

verified ·

1 Parent(s): d0457f4

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -21

app.py CHANGED Viewed

@@ -273,6 +273,7 @@ elif app_mode == "Smart Cleaning":
             clean_action = st.selectbox("Choose Operation", [
                 "Handle Missing Values",
                 "Clean Text",
                 # ... other cleaning operations ...
             ])
@@ -298,7 +299,13 @@ elif app_mode == "Smart Cleaning":
                 if cleaning_operation == "Remove Special Characters":
                     chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
         with col2:
             if st.button("Apply Transformation"):
                 with st.spinner("Applying changes..."):
                     current_df = df.copy()
@@ -330,7 +337,7 @@ elif app_mode == "Smart Cleaning":
                             current_df = current_df.dropna()
                     elif clean_action == "Clean Text":
-                        import re #moved here since its only used here to avoid library bloat
                         def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
                             if operation == "Remove Special Characters":
@@ -345,9 +352,15 @@ elif app_mode == "Smart Cleaning":
                         current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
                     st.session_state.cleaned_data = current_df
                     st.success("Transformation applied!")
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Analysis")
@@ -594,12 +607,15 @@ elif app_mode == "Model Training":
         feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
-        if model_name == "Random Forest":
             param_grid = {
                 'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
                 'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
                 'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
                 'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
             }
         # Train-Test Split
@@ -610,6 +626,12 @@ elif app_mode == "Model Training":
                 try:
                     X = df[feature_columns]
                     y = df[target_column]
                     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
                     # Preprocessing Pipeline
@@ -658,16 +680,23 @@ elif app_mode == "Model Training":
                     elif model_name == "Random Forest":
                         if problem_type == "Regression":
                             model = RandomForestRegressor(random_state=42)
-                            grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error')  # Example scoring
-                            grid_search.fit(X_train_selected, y_train)
-                            model = grid_search.best_estimator_
-                            st.write("Best Parameters:", grid_search.best_params_)
                         else:
                             model = RandomForestClassifier(random_state=42)
-                            grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
-                            grid_search.fit(X_train_selected, y_train)
-                            model = grid_search.best_estimator_
-                            st.write("Best Parameters:", grid_search.best_params_)
                     elif model_name == "Gradient Boosting":
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
@@ -721,16 +750,17 @@ elif app_mode == "Model Training":
                 st.error(f"Error loading model: {e}")
        #Model Evaluation Section
-        y_pred = st.session_state.model.predict(X_test)
-        if problem_type == "Regression":
-            mse = mean_squared_error(y_test, y_pred)
-            r2 = r2_score(y_test, y_pred)
-            st.write(f"Mean Squared Error: {mse:.4f}")
-            st.write(f"R-squared: {r2:.4f}")
-        else:
-            accuracy = accuracy_score(y_test, y_pred)
-            st.write(f"Accuracy: {accuracy:.4f}")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")

             clean_action = st.selectbox("Choose Operation", [
                 "Handle Missing Values",
                 "Clean Text",
+                "Remove Columns",  # New option
                 # ... other cleaning operations ...
             ])
                 if cleaning_operation == "Remove Special Characters":
                     chars_to_remove = st.text_input("Characters to Remove", r'[^a-zA-Z0-9\s]')
+            elif clean_action == "Remove Columns":
+                remove_cols = st.multiselect("Columns to Remove", df.columns)  # Multiselect for column removal
         with col2:
+            st.subheader("Data Preview")  # Added Data Preview Section
+            st.dataframe(df.head(10), use_container_width=True)  # Display sample data
             if st.button("Apply Transformation"):
                 with st.spinner("Applying changes..."):
                     current_df = df.copy()
                             current_df = current_df.dropna()
                     elif clean_action == "Clean Text":
+                        import re  # moved here since its only used here to avoid library bloat
                         def clean_text(text, operation, chars_to_remove=r'[^a-zA-Z0-9\s]'):
                             if operation == "Remove Special Characters":
                         current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
+                    elif clean_action == "Remove Columns":
+                        current_df = current_df.drop(columns=remove_cols)  # Drop selected columns
                     st.session_state.cleaned_data = current_df
                     st.success("Transformation applied!")
+            if st.button("Refresh Data Preview"):  # Button to refresh data preview
+                st.experimental_rerun()
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Analysis")
         feature_selection_method = st.selectbox("Feature Selection Method", ["None", "SelectKBest"])
+        if model_name == "Random Forest" and feature_columns:  # Check if Random Forest and features are selected
+            min_features = 1 # Ensure at least one feature is used
+            max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
             param_grid = {
                 'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
                 'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
                 'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
                 'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
             }
         # Train-Test Split
                 try:
                     X = df[feature_columns]
                     y = df[target_column]
+                    # Check if X is empty
+                    if X.empty:
+                        st.error("No features were selected. Please select feature columns.")
+                        st.stop()
                     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
                     # Preprocessing Pipeline
                     elif model_name == "Random Forest":
                         if problem_type == "Regression":
                             model = RandomForestRegressor(random_state=42)
+                            if 'param_grid' in locals():
+                                grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error')  # Example scoring
+                                grid_search.fit(X_train_selected, y_train)
+                                model = grid_search.best_estimator_
+                                st.write("Best Parameters:", grid_search.best_params_)
+                            else:
+                                model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined.
                         else:
                             model = RandomForestClassifier(random_state=42)
+                            if 'param_grid' in locals():
+                                grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
+                                grid_search.fit(X_train_selected, y_train)
+                                model = grid_search.best_estimator_
+                                st.write("Best Parameters:", grid_search.best_params_)
+                            else:
+                                model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
                     elif model_name == "Gradient Boosting":
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
                 st.error(f"Error loading model: {e}")
        #Model Evaluation Section
+        if 'X_test' in locals() and st.session_state.model is not None:
+            y_pred = st.session_state.model.predict(X_test)
+            if problem_type == "Regression":
+                mse = mean_squared_error(y_test, y_pred)
+                r2 = r2_score(y_test, y_pred)
+                st.write(f"Mean Squared Error: {mse:.4f}")
+                st.write(f"R-squared: {r2:.4f}")
+            else:
+                accuracy = accuracy_score(y_test, y_pred)
+                st.write(f"Accuracy: {accuracy:.4f}")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")