Spaces:

CosmickVisions
/

Data-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Feb 28

Commit

9e30598

verified ·

1 Parent(s): c7e14fb

Update app.py

Browse files

Files changed (1) hide show

app.py +78 -7

app.py CHANGED Viewed

@@ -353,13 +353,14 @@ elif app_mode == "Smart Cleaning":
                         current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
                     elif clean_action == "Remove Columns":
-                        current_df = current_df.drop(columns=remove_cols)  # Drop selected columns
                     st.session_state.cleaned_data = current_df
                     st.success("Transformation applied!")
             if st.button("Refresh Data Preview"):  # Button to refresh data preview
-                st.experimental_rerun()
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Analysis")
@@ -611,11 +612,10 @@ elif app_mode == "Model Training":
             min_features = 1 # Ensure at least one feature is used
             max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
             param_grid = {
-                'n_estimators': st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),
-                'max_depth': st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),
-                'min_samples_split': st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"), #New hyperparameter
-                'min_samples_leaf': st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"), #New hyperparameter
             }
         # Train-Test Split
@@ -699,6 +699,7 @@ elif app_mode == "Model Training":
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
                     elif model_name == "Gradient Boosting":
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
                     elif model_name == "SVM":
                         model = SVR() if problem_type == "Regression" else SVC()
@@ -761,6 +762,76 @@ elif app_mode == "Model Training":
             else:
                 accuracy = accuracy_score(y_test, y_pred)
                 st.write(f"Accuracy: {accuracy:.4f}")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")

                         current_df[text_column] = current_df[text_column].astype(str).apply(lambda x: clean_text(x, cleaning_operation, chars_to_remove))
                     elif clean_action == "Remove Columns":
+                        if remove_cols: #Check that it is not empty
+                            current_df = current_df.drop(columns=remove_cols)  # Drop selected columns
                     st.session_state.cleaned_data = current_df
                     st.success("Transformation applied!")
             if st.button("Refresh Data Preview"):  # Button to refresh data preview
+                st.rerun()
 elif app_mode == "Advanced EDA":
     st.title("🔍 Advanced Exploratory Analysis")
             min_features = 1 # Ensure at least one feature is used
             max_features = len(feature_columns) if len(feature_columns) > 0 else 1 # Use 1 if no features are selected
             param_grid = {
+                'n_estimators': list(range(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest."),(st.slider("Number of Estimators", 10, 200, 100, help="Number of trees in the forest.")+1))),
+                'max_depth': list(range(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree."),(st.slider("Max Depth", 3, 20, 10, help="Maximum depth of the tree.")+1))),
+                'min_samples_split': list(range(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node"),(st.slider("Minimum Samples Split", 2, 10, 2, help="Minimum samples required to split an internal node")+1))), #New hyperparameter
+                'min_samples_leaf': list(range(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node"),(st.slider("Minimum Samples Leaf", 1, 10, 1, help="Minimum samples required to be at a leaf node")+1))), #New hyperparameter
             }
         # Train-Test Split
                                 model.fit(X_train_selected, y_train) # fit without gridsearch if param_grid is not defined
                     elif model_name == "Gradient Boosting":
+                        from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier #moved import here to avoid bloat
                         model = GradientBoostingRegressor() if problem_type == "Regression" else GradientBoostingClassifier()
                     elif model_name == "SVM":
                         model = SVR() if problem_type == "Regression" else SVC()
             else:
                 accuracy = accuracy_score(y_test, y_pred)
                 st.write(f"Accuracy: {accuracy:.4f}")
+elif app_mode == "Predictions":
+    st.title("🔮 Make Predictions")
+    if st.session_state.model is not None and st.session_state.cleaned_data is not None:
+        df = st.session_state.cleaned_data.copy()
+        # Input data for prediction
+        st.subheader("Enter Data for Prediction")
+        input_data = {}
+        try:
+            numeric_transformer_columns = st.session_state.model.steps[0][1].transformers_[0][2] if hasattr(st.session_state.model.steps[0][1].transformers_[0][2], '__len__') else []
+            categorical_transformer_columns = st.session_state.model.steps[0][1].transformers_[1][2] if hasattr(st.session_state.model.steps[0][1].transformers_[1][2], '__len__') else []
+            model_columns = numeric_transformer_columns + categorical_transformer_columns
+        except AttributeError as e:
+            st.error(f"Error accessing model transformers: {e}. Please ensure a valid model is trained and loaded.")
+            st.stop()
+        if not set(model_columns).issubset(set(df.columns)): #Fixed comparison
+            st.error("The model was trained on a dataframe that contains different columns than the currently uploaded dataframe. Please upload the correct dataframe.")
+            st.stop()
+        for col in model_columns:
+            if pd.api.types.is_numeric_dtype(df[col]):
+                input_data[col] = st.number_input(f"Enter {col}", value=df[col].mean())
+            else:
+                input_data[col] = st.selectbox(f"Select {col}", df[col].unique())
+        # Prediction Button
+        if st.button("Make Prediction"):
+            try:
+                input_df = pd.DataFrame([input_data])
+                prediction = st.session_state.model.predict(input_df)[0]
+                st.subheader("Prediction Result")
+                st.write(f"The predicted value is: {prediction}")
+                # Additional Feedback (Example for Classification)
+                if isinstance(st.session_state.model.steps[-1][1], LogisticRegression):
+                    probabilities = st.session_state.model.predict_proba(input_df)[0]
+                    st.write("Predicted Probabilities:")
+                    st.write(probabilities)
+            except Exception as e:
+                st.error(f"An error occurred during prediction: {e}")
+        #Add batch prediction section in prediction tab
+        st.subheader("Batch Predictions")
+        batch_file = st.file_uploader("Upload CSV for Batch Predictions", type=["csv"])
+        if batch_file is not None:
+            try:
+                batch_df = pd.read_csv(batch_file)
+                # Preprocess the batch data
+                batch_processed = st.session_state.preprocessor.transform(batch_df)
+                # Make predictions
+                batch_predictions = st.session_state.model.predict(batch_processed)
+                batch_df['Prediction'] = batch_predictions
+                st.dataframe(batch_df)
+             # Download predictions
+                csv = batch_df.to_csv(index=False)
+                b64 = base64.b64encode(csv.encode()).decode()  # some strings
+                href = f'<a href="data:file/csv;base64,{b64}" download="predictions.csv">Download Predictions CSV</a>'
+                st.markdown(href, unsafe_allow_html=True)
+            except Exception as e:
+                st.error(f"Error processing batch file: {e}")
+    else:
+        st.write("Please train a model first in the 'Model Training' section.")
 elif app_mode == "Predictions":
     st.title("🔮 Make Predictions")