Spaces:
Running
Running
File size: 12,411 Bytes
3a79137 c95e38c 3a79137 c95e38c 803a7f3 c95e38c 3a79137 c95e38c 3a79137 a7d799a c95e38c 803a7f3 eebb0bf 803a7f3 3a79137 c95e38c 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 a75b8f3 3a79137 a75b8f3 eebb0bf a75b8f3 eebb0bf 803a7f3 3a79137 803a7f3 3a79137 eebb0bf 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 a75b8f3 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 bb80d6f 803a7f3 bb80d6f 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 803a7f3 3a79137 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
import streamlit as st
import pandas as pd
# Make sure to import the correct module dynamically based on the task
from pycaret.classification import load_model, predict_model
import os
import warnings # Added to potentially suppress warnings
import logging # Added for better debugging in the Space
# --- Page Configuration (MUST BE FIRST STREAMLIT COMMAND) ---
APP_TITLE = "my-pycaret-app"
st.set_page_config(page_title=APP_TITLE, layout="centered", initial_sidebar_state="collapsed")
# Configure simple logging for the Streamlit app
# Use Streamlit logger if available, otherwise basic config
try:
# Attempt to get logger specific to Streamlit context
logger = st.logger.get_logger(__name__)
except AttributeError: # Fallback for older Streamlit versions or different contexts
# Basic logging setup if Streamlit logger isn't available
logging.basicConfig(level=logging.INFO, format='%(asctime)s - StreamlitApp - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# --- Model Configuration ---
MODEL_FILE = "model.pkl" # Relative path within the Space
# --- PyCaret Task Module (as a string for conditional logic) --- # <<< --- ADD THIS
APP_PYCARET_TASK_MODULE = "pycaret.classification"
# --- Processed Schema (for type checking later) ---
# Use double braces to embed the schema dict correctly in the generated code
APP_SCHEMA = {'PassengerId': {'type': 'numerical'}, 'Pclass': {'type': 'numerical'}, 'Name': {'type': 'numerical'}, 'Sex': {'type': 'categorical', 'values': ['male', 'female']}, 'Age': {'type': 'numerical'}, 'SibSp': {'type': 'numerical'}, 'Parch': {'type': 'numerical'}, 'Ticket': {'type': 'numerical'}, 'Fare': {'type': 'numerical'}, 'Cabin': {'type': 'categorical', 'values': ['A', 'B', 'C']}, 'Embarked': {'type': 'categorical', 'values': ['S', 'C', 'Q']}}
# --- Load Model ---
# Use cache_resource for efficient loading
@st.cache_resource
def get_model():
logger.info(f"Attempting to load model from file: {MODEL_FILE}")
# Define the path expected by PyCaret's load_model (without extension)
model_load_path = MODEL_FILE.replace('.pkl','')
logger.info(f"Calculated PyCaret load path: '{model_load_path}'") # Escaped braces
if not os.path.exists(MODEL_FILE):
st.error(f"Model file '{MODEL_FILE}' not found in the Space repository.")
logger.error(f"Model file '{MODEL_FILE}' not found at expected path.")
return None
try:
# Suppress specific warnings during loading if needed
# warnings.filterwarnings("ignore", category=UserWarning, message=".*Trying to unpickle estimator.*")
logger.info(f"Calling PyCaret's load_model('{model_load_path}')...") # Escaped braces
# Ensure PyCaret logging doesn't interfere excessively if needed
# from pycaret.utils.generic import enable_colab
# enable_colab() # May help manage output/logging in some environments
model = load_model(model_load_path)
logger.info("PyCaret's load_model executed successfully.")
return model
except FileNotFoundError:
# Specific handling if load_model itself can't find related files (like preprocess.pkl)
st.error(f"Error loading model components for '{model_load_path}'. PyCaret's load_model failed, possibly missing auxiliary files.") # Escaped braces
logger.exception(f"PyCaret load_model failed for '{model_load_path}', likely due to missing components:") # Escaped braces
return None
except Exception as e:
# Catch other potential errors during model loading
st.error(f"An unexpected error occurred loading model '{model_load_path}': {e}") # Escaped braces around model_load_path
logger.exception("Unexpected model loading error details:") # Log full traceback
return None
# --- Load the model ---
model = get_model()
# --- App Layout ---
st.title(APP_TITLE) # Title now comes after page config
if model is None:
st.error("Model could not be loaded. Please check the application logs in the Space settings for more details. Application cannot proceed.")
else:
st.success("Model loaded successfully!") # Indicate success
st.markdown("Provide the input features below to generate a prediction using the deployed model.")
# --- Input Section ---
st.header("Model Inputs")
with st.form("prediction_form"):
# Dynamically generated widgets based on schema (now with correct indentation)
input_PassengerId = st.number_input(label='PassengerId', format='%f', key='input_PassengerId')
input_Pclass = st.number_input(label='Pclass', format='%f', key='input_Pclass')
input_Name = st.number_input(label='Name', format='%f', key='input_Name')
input_Sex = st.selectbox(label='Sex', options=['male', 'female'], key='input_Sex')
input_Age = st.number_input(label='Age', format='%f', key='input_Age')
input_SibSp = st.number_input(label='SibSp', format='%f', key='input_SibSp')
input_Parch = st.number_input(label='Parch', format='%f', key='input_Parch')
input_Ticket = st.number_input(label='Ticket', format='%f', key='input_Ticket')
input_Fare = st.number_input(label='Fare', format='%f', key='input_Fare')
input_Cabin = st.selectbox(label='Cabin', options=['A', 'B', 'C'], key='input_Cabin')
input_Embarked = st.selectbox(label='Embarked', options=['S', 'C', 'Q'], key='input_Embarked')
submitted = st.form_submit_button("π Get Prediction")
# --- Prediction Logic & Output Section ---
if submitted:
st.header("Prediction Output")
try:
# Create DataFrame from inputs using original feature names as keys
# The values are automatically fetched by Streamlit using the keys assigned to widgets
input_data_dict = {'PassengerId': input_PassengerId, 'Pclass': input_Pclass, 'Name': input_Name, 'Sex': input_Sex, 'Age': input_Age, 'SibSp': input_SibSp, 'Parch': input_Parch, 'Ticket': input_Ticket, 'Fare': input_Fare, 'Cabin': input_Cabin, 'Embarked': input_Embarked} # Use triple braces for dict literal inside f-string
logger.info(f"Raw input data from form: {input_data_dict}")
input_data = pd.DataFrame([input_data_dict])
# Ensure correct dtypes based on schema before prediction
logger.info("Applying dtypes based on schema...")
# Use APP_SCHEMA defined earlier
for feature, details in APP_SCHEMA.items():
feature_type = details.get("type", "text").lower()
if feature in input_data.columns: # Check if feature exists
try:
current_value = input_data[feature].iloc[0]
# Skip conversion if value is already None or NaN equivalent
if pd.isna(current_value):
continue
if feature_type == 'numerical':
# Convert to numeric, coercing errors (users might enter text)
input_data[feature] = pd.to_numeric(input_data[feature], errors='coerce')
elif feature_type == 'categorical':
# Ensure categorical inputs are treated as strings by the model if needed
# PyCaret often expects object/string type for categoricals in predict_model
input_data[feature] = input_data[feature].astype(str)
# Add elif for other types if needed (e.g., datetime)
# else: # text
# input_data[feature] = input_data[feature].astype(str) # Ensure string type
except Exception as type_e:
logger.warning(f"Could not convert feature '{feature}' (value: {current_value}) to type '{feature_type}'. Error: {type_e}")
# Decide how to handle type conversion errors, e.g., set to NaN or keep original
input_data[feature] = pd.NA # Set to missing if conversion fails
else:
logger.warning(f"Feature '{feature}' from schema not found in input form data.")
# Handle potential NaN values from coercion or failed conversion
if input_data.isnull().values.any():
st.warning("Some inputs might be invalid or missing. Attempting to handle missing values (e.g., replacing with 0 for numerical). Check logs for details.")
logger.warning(f"NaN values found in input data after type conversion/validation. Filling numerical with 0. Data before fill:\n{input_data}")
# More robust imputation might be needed depending on the model
# Fill only numerical NaNs with 0, leave others? Or use mode for categoricals?
for feature, details in APP_SCHEMA.items():
# Check if column exists before attempting to fill
if feature in input_data.columns and details.get("type") == "numerical" and input_data[feature].isnull().any():
input_data[feature].fillna(0, inplace=True)
# input_data.fillna(0, inplace=True) # Previous simpler strategy
logger.info(f"Data after filling NaN:\n{input_data}")
st.markdown("##### Input Data Sent to Model (after processing):")
st.dataframe(input_data)
# Make prediction
logger.info("Calling predict_model...")
with st.spinner("Predicting..."):
# Suppress prediction warnings if needed
# with warnings.catch_warnings():
# warnings.simplefilter("ignore")
predictions = predict_model(model, data=input_data)
logger.info("Prediction successful.")
st.markdown("##### Prediction Result:")
logger.info(f"Prediction output columns: {predictions.columns.tolist()}")
# Display relevant prediction columns (adjust based on PyCaret task)
# Common columns: 'prediction_label', 'prediction_score'
pred_col_label = 'prediction_label'
pred_col_score = 'prediction_score'
if pred_col_label in predictions.columns:
st.success(f"Predicted Label: **{predictions[pred_col_label].iloc[0]}**")
# Also show score if available for classification
if pred_col_score in predictions.columns and APP_PYCARET_TASK_MODULE == 'pycaret.classification':
st.info(f"Prediction Score: **{predictions[pred_col_score].iloc[0]:.4f}**")
# Handle regression output (usually just score)
elif pred_col_score in predictions.columns and APP_PYCARET_TASK_MODULE == 'pycaret.regression':
st.success(f"Predicted Value: **{predictions[pred_col_score].iloc[0]:.4f}**")
else:
# Fallback: Display the last column as prediction if specific ones aren't found
try:
# Exclude input columns if they are present in the output df
output_columns = [col for col in predictions.columns if col not in input_data.columns]
if output_columns:
last_col_name = output_columns[-1]
st.info(f"Prediction Output (Column: '{last_col_name}'): **{predictions[last_col_name].iloc[0]}**")
logger.warning(f"Could not find standard prediction columns. Displaying last non-input column: '{last_col_name}'")
else: # If only input columns are returned (unlikely)
st.warning("Prediction output seems to only contain input columns.")
except IndexError:
st.error("Prediction result DataFrame is empty or has unexpected format.")
logger.error("Prediction result DataFrame is empty or has unexpected format.")
# Show full prediction output optionally
with st.expander("View Full Prediction Output DataFrame"):
st.dataframe(predictions)
except Exception as e:
st.error(f"An error occurred during prediction: {e}")
logger.exception("Prediction error details:") # Log full traceback
|