Spaces:

HelloWorld2307
/

my-pycaret-app

Sleeping

App Files Files Community

HelloWorld2307 commited on May 4

Commit

803a7f3

verified ·

1 Parent(s): c95e38c

Deploy PyCaret model baseline_dt_20250426_212853.pkl with improved UI

Browse files

Files changed (1) hide show

app.py +68 -40

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import logging # Added for better debugging in the Space
 # --- Page Configuration (MUST BE FIRST STREAMLIT COMMAND) ---
 APP_TITLE = "my-pycaret-app"
-st.set_page_config(page_title=APP_TITLE, layout="centered")
 # Configure simple logging for the Streamlit app
 # Use Streamlit logger if available, otherwise basic config
@@ -25,6 +25,10 @@ except AttributeError: # Fallback for older Streamlit versions or different cont
 # --- Model Configuration ---
 MODEL_FILE = "model.pkl" # Relative path within the Space
 # --- Load Model ---
 # Use cache_resource for efficient loading
@@ -64,34 +68,35 @@ def get_model():
 model = get_model()
 # --- App Layout ---
-st.title(APP_TITLE) # Title now comes after page config and model loading attempt
 if model is None:
     st.error("Model could not be loaded. Please check the application logs in the Space settings for more details. Application cannot proceed.")
 else:
     st.success("Model loaded successfully!") # Indicate success
-    st.write("Enter the input features below to get a prediction.")
-    # --- Input Widgets ---
     with st.form("prediction_form"):
-        st.subheader("Input Features:")
         # Dynamically generated widgets based on schema
         input_PassengerId = st.number_input(label='PassengerId', format='%f', key='input_PassengerId')
-        input_Pclass = st.number_input(label='Pclass', format='%f', key='input_Pclass')
-        input_Name = st.number_input(label='Name', format='%f', key='input_Name')
-        input_Sex = st.number_input(label='Sex', format='%f', key='input_Sex')
-        input_Age = st.number_input(label='Age', format='%f', key='input_Age')
-        input_SibSp = st.number_input(label='SibSp', format='%f', key='input_SibSp')
-        input_Parch = st.number_input(label='Parch', format='%f', key='input_Parch')
-        input_Ticket = st.number_input(label='Ticket', format='%f', key='input_Ticket')
-        input_Fare = st.number_input(label='Fare', format='%f', key='input_Fare')
-        input_Cabin = st.number_input(label='Cabin', format='%f', key='input_Cabin')
-        input_Embarked = st.number_input(label='Embarked', format='%f', key='input_Embarked')
-        input_Survived = st.number_input(label='Survived', format='%f', key='input_Survived')
-        submitted = st.form_submit_button("Predict")
-    # --- Prediction Logic ---
     if submitted:
         try:
             # Create DataFrame from inputs using original feature names as keys
             # The values are automatically fetched by Streamlit using the keys assigned to widgets
@@ -101,35 +106,49 @@ else:
             # Ensure correct dtypes based on schema before prediction
             logger.info("Applying dtypes based on schema...")
-            # Use double braces for the schema dict literal in the generated code
-            for feature, f_type in {'PassengerId': 'numerical', 'Pclass': 'numerical', 'Name': 'numerical', 'Sex': 'numerical', 'Age': 'numerical', 'SibSp': 'numerical', 'Parch': 'numerical', 'Ticket': 'numerical', 'Fare': 'numerical', 'Cabin': 'numerical', 'Embarked': 'numerical', 'Survived': 'numerical'}.items():
                  if feature in input_data.columns: # Check if feature exists
                      try:
-                         if f_type == 'numerical':
                              # Convert to numeric, coercing errors (users might enter text)
                              input_data[feature] = pd.to_numeric(input_data[feature], errors='coerce')
-                         # Add elif for 'categorical' or other types if needed
-                         # else:
                          #     input_data[feature] = input_data[feature].astype(str) # Ensure string type
                      except Exception as type_e:
-                         logger.warning(f"Could not convert feature '{feature}' to type '{f_type}'. Error: {type_e}")
                          # Decide how to handle type conversion errors, e.g., set to NaN or keep original
                          input_data[feature] = pd.NA # Set to missing if conversion fails
                  else:
                      logger.warning(f"Feature '{feature}' from schema not found in input form data.")
             # Handle potential NaN values from coercion or failed conversion
             if input_data.isnull().values.any():
-                 st.warning("Some inputs might be invalid or missing. Attempting to handle missing values (e.g., replacing with 0).")
-                 logger.warning(f"NaN values found in input data after type conversion/validation. Filling with 0. Data before fill:\n{input_data}")
                  # More robust imputation might be needed depending on the model
-                 input_data.fillna(0, inplace=True) # Simple imputation strategy
-                 logger.info(f"Data after filling NaN with 0:\n{input_data}")
-            st.write("Input Data for Prediction (after processing):")
             st.dataframe(input_data)
             # Make prediction
@@ -141,7 +160,7 @@ else:
                 predictions = predict_model(model, data=input_data)
                 logger.info("Prediction successful.")
-            st.subheader("Prediction Result:")
             logger.info(f"Prediction output columns: {predictions.columns.tolist()}")
             # Display relevant prediction columns (adjust based on PyCaret task)
@@ -151,21 +170,30 @@ else:
             if pred_col_label in predictions.columns:
                 st.success(f"Predicted Label: **{predictions[pred_col_label].iloc[0]}**")
-            elif pred_col_score in predictions.columns: # Show score if label not present (e.g., regression)
-                 st.success(f"Prediction Score: **{predictions[pred_col_score].iloc[0]:.4f}**")
             else:
                  # Fallback: Display the last column as prediction if specific ones aren't found
                  try:
-                     last_col_name = predictions.columns[-1]
-                     st.info(f"Prediction Output (Column: '{last_col_name}'): **{predictions[last_col_name].iloc[0]}**")
-                     logger.warning(f"Could not find 'prediction_label' or 'prediction_score'. Displaying last column: '{last_col_name}'")
                  except IndexError:
-                     st.error("Prediction result DataFrame is empty.")
-                     logger.error("Prediction result DataFrame is empty.")
             # Show full prediction output optionally
-            with st.expander("See Full Prediction Output DataFrame"):
                 st.dataframe(predictions)
         except Exception as e:

 # --- Page Configuration (MUST BE FIRST STREAMLIT COMMAND) ---
 APP_TITLE = "my-pycaret-app"
+st.set_page_config(page_title=APP_TITLE, layout="centered", initial_sidebar_state="collapsed")
 # Configure simple logging for the Streamlit app
 # Use Streamlit logger if available, otherwise basic config
 # --- Model Configuration ---
 MODEL_FILE = "model.pkl" # Relative path within the Space
+# --- Processed Schema (for type checking later) ---
+# Use double braces to embed the schema dict correctly in the generated code
+APP_SCHEMA = {'PassengerId': {'type': 'numerical'}, 'Pclass': {'type': 'numerical'}, 'Name': {'type': 'numerical'}, 'Sex': {'type': 'numerical'}, 'Age': {'type': 'numerical'}, 'SibSp': {'type': 'numerical'}, 'Parch': {'type': 'numerical'}, 'Ticket': {'type': 'numerical'}, 'Fare': {'type': 'numerical'}, 'Cabin': {'type': 'numerical'}, 'Embarked': {'type': 'numerical'}, 'Survived': {'type': 'numerical'}}
 # --- Load Model ---
 # Use cache_resource for efficient loading
 model = get_model()
 # --- App Layout ---
+st.title(APP_TITLE) # Title now comes after page config
 if model is None:
     st.error("Model could not be loaded. Please check the application logs in the Space settings for more details. Application cannot proceed.")
 else:
     st.success("Model loaded successfully!") # Indicate success
+    st.markdown("Provide the input features below to generate a prediction using the deployed model.")
+    # --- Input Section ---
+    st.header("Model Inputs")
     with st.form("prediction_form"):
         # Dynamically generated widgets based on schema
         input_PassengerId = st.number_input(label='PassengerId', format='%f', key='input_PassengerId')
+            input_Pclass = st.number_input(label='Pclass', format='%f', key='input_Pclass')
+            input_Name = st.number_input(label='Name', format='%f', key='input_Name')
+            input_Sex = st.number_input(label='Sex', format='%f', key='input_Sex')
+            input_Age = st.number_input(label='Age', format='%f', key='input_Age')
+            input_SibSp = st.number_input(label='SibSp', format='%f', key='input_SibSp')
+            input_Parch = st.number_input(label='Parch', format='%f', key='input_Parch')
+            input_Ticket = st.number_input(label='Ticket', format='%f', key='input_Ticket')
+            input_Fare = st.number_input(label='Fare', format='%f', key='input_Fare')
+            input_Cabin = st.number_input(label='Cabin', format='%f', key='input_Cabin')
+            input_Embarked = st.number_input(label='Embarked', format='%f', key='input_Embarked')
+            input_Survived = st.number_input(label='Survived', format='%f', key='input_Survived')
+        submitted = st.form_submit_button("📊 Get Prediction")
+    # --- Prediction Logic & Output Section ---
     if submitted:
+        st.header("Prediction Output")
         try:
             # Create DataFrame from inputs using original feature names as keys
             # The values are automatically fetched by Streamlit using the keys assigned to widgets
             # Ensure correct dtypes based on schema before prediction
             logger.info("Applying dtypes based on schema...")
+            # Use APP_SCHEMA defined earlier
+            for feature, details in APP_SCHEMA.items():
+                 feature_type = details.get("type", "text").lower()
                  if feature in input_data.columns: # Check if feature exists
                      try:
+                         current_value = input_data[feature].iloc[0]
+                         # Skip conversion if value is already None or NaN equivalent
+                         if pd.isna(current_value):
+                              continue
+                         if feature_type == 'numerical':
                              # Convert to numeric, coercing errors (users might enter text)
                              input_data[feature] = pd.to_numeric(input_data[feature], errors='coerce')
+                         elif feature_type == 'categorical':
+                             # Ensure categorical inputs are treated as strings by the model if needed
+                             # PyCaret often expects object/string type for categoricals in predict_model
+                             input_data[feature] = input_data[feature].astype(str)
+                         # Add elif for other types if needed (e.g., datetime)
+                         # else: # text
                          #     input_data[feature] = input_data[feature].astype(str) # Ensure string type
                      except Exception as type_e:
+                         logger.warning(f"Could not convert feature '{feature}' (value: {current_value}) to type '{feature_type}'. Error: {type_e}")
                          # Decide how to handle type conversion errors, e.g., set to NaN or keep original
                          input_data[feature] = pd.NA # Set to missing if conversion fails
                  else:
                      logger.warning(f"Feature '{feature}' from schema not found in input form data.")
             # Handle potential NaN values from coercion or failed conversion
             if input_data.isnull().values.any():
+                 st.warning("Some inputs might be invalid or missing. Attempting to handle missing values (e.g., replacing with 0 for numerical). Check logs for details.")
+                 logger.warning(f"NaN values found in input data after type conversion/validation. Filling numerical with 0. Data before fill:\n{input_data}")
                  # More robust imputation might be needed depending on the model
+                 # Fill only numerical NaNs with 0, leave others? Or use mode for categoricals?
+                 for feature, details in APP_SCHEMA.items():
+                     if details.get("type") == "numerical" and input_data[feature].isnull().any():
+                         input_data[feature].fillna(0, inplace=True)
+                 # input_data.fillna(0, inplace=True) # Previous simpler strategy
+                 logger.info(f"Data after filling NaN:\n{input_data}")
+            st.markdown("##### Input Data Sent to Model (after processing):")
             st.dataframe(input_data)
             # Make prediction
                 predictions = predict_model(model, data=input_data)
                 logger.info("Prediction successful.")
+            st.markdown("##### Prediction Result:")
             logger.info(f"Prediction output columns: {predictions.columns.tolist()}")
             # Display relevant prediction columns (adjust based on PyCaret task)
             if pred_col_label in predictions.columns:
                 st.success(f"Predicted Label: **{predictions[pred_col_label].iloc[0]}**")
+            # Also show score if available for classification
+            if pred_col_score in predictions.columns and pycaret_task_module == 'pycaret.classification':
+                 st.info(f"Prediction Score: **{predictions[pred_col_score].iloc[0]:.4f}**")
+            # Handle regression output (usually just score)
+            elif pred_col_score in predictions.columns and pycaret_task_module == 'pycaret.regression':
+                 st.success(f"Predicted Value: **{predictions[pred_col_score].iloc[0]:.4f}**")
             else:
                  # Fallback: Display the last column as prediction if specific ones aren't found
                  try:
+                     # Exclude input columns if they are present in the output df
+                     output_columns = [col for col in predictions.columns if col not in input_data.columns]
+                     if output_columns:
+                         last_col_name = output_columns[-1]
+                         st.info(f"Prediction Output (Column: '{last_col_name}'): **{predictions[last_col_name].iloc[0]}**")
+                         logger.warning(f"Could not find standard prediction columns. Displaying last non-input column: '{last_col_name}'")
+                     else: # If only input columns are returned (unlikely)
+                         st.warning("Prediction output seems to only contain input columns.")
                  except IndexError:
+                     st.error("Prediction result DataFrame is empty or has unexpected format.")
+                     logger.error("Prediction result DataFrame is empty or has unexpected format.")
             # Show full prediction output optionally
+            with st.expander("View Full Prediction Output DataFrame"):
                 st.dataframe(predictions)
         except Exception as e: