Spaces:
Running
Running
Deploy PyCaret model baseline_dt_20250426_212853.pkl with improved UI
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import logging # Added for better debugging in the Space
|
|
9 |
|
10 |
# --- Page Configuration (MUST BE FIRST STREAMLIT COMMAND) ---
|
11 |
APP_TITLE = "my-pycaret-app"
|
12 |
-
st.set_page_config(page_title=APP_TITLE, layout="centered")
|
13 |
|
14 |
# Configure simple logging for the Streamlit app
|
15 |
# Use Streamlit logger if available, otherwise basic config
|
@@ -25,6 +25,10 @@ except AttributeError: # Fallback for older Streamlit versions or different cont
|
|
25 |
# --- Model Configuration ---
|
26 |
MODEL_FILE = "model.pkl" # Relative path within the Space
|
27 |
|
|
|
|
|
|
|
|
|
28 |
|
29 |
# --- Load Model ---
|
30 |
# Use cache_resource for efficient loading
|
@@ -64,34 +68,35 @@ def get_model():
|
|
64 |
model = get_model()
|
65 |
|
66 |
# --- App Layout ---
|
67 |
-
st.title(APP_TITLE) # Title now comes after page config
|
68 |
|
69 |
if model is None:
|
70 |
st.error("Model could not be loaded. Please check the application logs in the Space settings for more details. Application cannot proceed.")
|
71 |
else:
|
72 |
st.success("Model loaded successfully!") # Indicate success
|
73 |
-
st.
|
74 |
|
75 |
-
# --- Input
|
|
|
76 |
with st.form("prediction_form"):
|
77 |
-
st.subheader("Input Features:")
|
78 |
# Dynamically generated widgets based on schema
|
79 |
input_PassengerId = st.number_input(label='PassengerId', format='%f', key='input_PassengerId')
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
submitted = st.form_submit_button("
|
92 |
-
|
93 |
-
# --- Prediction Logic ---
|
94 |
if submitted:
|
|
|
95 |
try:
|
96 |
# Create DataFrame from inputs using original feature names as keys
|
97 |
# The values are automatically fetched by Streamlit using the keys assigned to widgets
|
@@ -101,35 +106,49 @@ else:
|
|
101 |
|
102 |
# Ensure correct dtypes based on schema before prediction
|
103 |
logger.info("Applying dtypes based on schema...")
|
104 |
-
# Use
|
105 |
-
for feature,
|
|
|
106 |
if feature in input_data.columns: # Check if feature exists
|
107 |
try:
|
108 |
-
|
|
|
|
|
|
|
|
|
|
|
109 |
# Convert to numeric, coercing errors (users might enter text)
|
110 |
input_data[feature] = pd.to_numeric(input_data[feature], errors='coerce')
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
113 |
# input_data[feature] = input_data[feature].astype(str) # Ensure string type
|
|
|
114 |
except Exception as type_e:
|
115 |
-
logger.warning(f"Could not convert feature '{feature}' to type '{
|
116 |
# Decide how to handle type conversion errors, e.g., set to NaN or keep original
|
117 |
input_data[feature] = pd.NA # Set to missing if conversion fails
|
118 |
-
|
119 |
else:
|
120 |
logger.warning(f"Feature '{feature}' from schema not found in input form data.")
|
121 |
|
122 |
|
123 |
# Handle potential NaN values from coercion or failed conversion
|
124 |
if input_data.isnull().values.any():
|
125 |
-
st.warning("Some inputs might be invalid or missing. Attempting to handle missing values (e.g., replacing with 0).")
|
126 |
-
logger.warning(f"NaN values found in input data after type conversion/validation. Filling with 0. Data before fill:\n{input_data}")
|
127 |
# More robust imputation might be needed depending on the model
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
130 |
|
131 |
|
132 |
-
st.
|
133 |
st.dataframe(input_data)
|
134 |
|
135 |
# Make prediction
|
@@ -141,7 +160,7 @@ else:
|
|
141 |
predictions = predict_model(model, data=input_data)
|
142 |
logger.info("Prediction successful.")
|
143 |
|
144 |
-
st.
|
145 |
logger.info(f"Prediction output columns: {predictions.columns.tolist()}")
|
146 |
|
147 |
# Display relevant prediction columns (adjust based on PyCaret task)
|
@@ -151,21 +170,30 @@ else:
|
|
151 |
|
152 |
if pred_col_label in predictions.columns:
|
153 |
st.success(f"Predicted Label: **{predictions[pred_col_label].iloc[0]}**")
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
156 |
else:
|
157 |
# Fallback: Display the last column as prediction if specific ones aren't found
|
158 |
try:
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
162 |
except IndexError:
|
163 |
-
st.error("Prediction result DataFrame is empty.")
|
164 |
-
logger.error("Prediction result DataFrame is empty.")
|
165 |
|
166 |
|
167 |
# Show full prediction output optionally
|
168 |
-
with st.expander("
|
169 |
st.dataframe(predictions)
|
170 |
|
171 |
except Exception as e:
|
|
|
9 |
|
10 |
# --- Page Configuration (MUST BE FIRST STREAMLIT COMMAND) ---
|
11 |
APP_TITLE = "my-pycaret-app"
|
12 |
+
st.set_page_config(page_title=APP_TITLE, layout="centered", initial_sidebar_state="collapsed")
|
13 |
|
14 |
# Configure simple logging for the Streamlit app
|
15 |
# Use Streamlit logger if available, otherwise basic config
|
|
|
25 |
# --- Model Configuration ---
|
26 |
MODEL_FILE = "model.pkl" # Relative path within the Space
|
27 |
|
28 |
+
# --- Processed Schema (for type checking later) ---
|
29 |
+
# Use double braces to embed the schema dict correctly in the generated code
|
30 |
+
APP_SCHEMA = {'PassengerId': {'type': 'numerical'}, 'Pclass': {'type': 'numerical'}, 'Name': {'type': 'numerical'}, 'Sex': {'type': 'numerical'}, 'Age': {'type': 'numerical'}, 'SibSp': {'type': 'numerical'}, 'Parch': {'type': 'numerical'}, 'Ticket': {'type': 'numerical'}, 'Fare': {'type': 'numerical'}, 'Cabin': {'type': 'numerical'}, 'Embarked': {'type': 'numerical'}, 'Survived': {'type': 'numerical'}}
|
31 |
+
|
32 |
|
33 |
# --- Load Model ---
|
34 |
# Use cache_resource for efficient loading
|
|
|
68 |
model = get_model()
|
69 |
|
70 |
# --- App Layout ---
|
71 |
+
st.title(APP_TITLE) # Title now comes after page config
|
72 |
|
73 |
if model is None:
|
74 |
st.error("Model could not be loaded. Please check the application logs in the Space settings for more details. Application cannot proceed.")
|
75 |
else:
|
76 |
st.success("Model loaded successfully!") # Indicate success
|
77 |
+
st.markdown("Provide the input features below to generate a prediction using the deployed model.")
|
78 |
|
79 |
+
# --- Input Section ---
|
80 |
+
st.header("Model Inputs")
|
81 |
with st.form("prediction_form"):
|
|
|
82 |
# Dynamically generated widgets based on schema
|
83 |
input_PassengerId = st.number_input(label='PassengerId', format='%f', key='input_PassengerId')
|
84 |
+
input_Pclass = st.number_input(label='Pclass', format='%f', key='input_Pclass')
|
85 |
+
input_Name = st.number_input(label='Name', format='%f', key='input_Name')
|
86 |
+
input_Sex = st.number_input(label='Sex', format='%f', key='input_Sex')
|
87 |
+
input_Age = st.number_input(label='Age', format='%f', key='input_Age')
|
88 |
+
input_SibSp = st.number_input(label='SibSp', format='%f', key='input_SibSp')
|
89 |
+
input_Parch = st.number_input(label='Parch', format='%f', key='input_Parch')
|
90 |
+
input_Ticket = st.number_input(label='Ticket', format='%f', key='input_Ticket')
|
91 |
+
input_Fare = st.number_input(label='Fare', format='%f', key='input_Fare')
|
92 |
+
input_Cabin = st.number_input(label='Cabin', format='%f', key='input_Cabin')
|
93 |
+
input_Embarked = st.number_input(label='Embarked', format='%f', key='input_Embarked')
|
94 |
+
input_Survived = st.number_input(label='Survived', format='%f', key='input_Survived')
|
95 |
+
submitted = st.form_submit_button("📊 Get Prediction")
|
96 |
+
|
97 |
+
# --- Prediction Logic & Output Section ---
|
98 |
if submitted:
|
99 |
+
st.header("Prediction Output")
|
100 |
try:
|
101 |
# Create DataFrame from inputs using original feature names as keys
|
102 |
# The values are automatically fetched by Streamlit using the keys assigned to widgets
|
|
|
106 |
|
107 |
# Ensure correct dtypes based on schema before prediction
|
108 |
logger.info("Applying dtypes based on schema...")
|
109 |
+
# Use APP_SCHEMA defined earlier
|
110 |
+
for feature, details in APP_SCHEMA.items():
|
111 |
+
feature_type = details.get("type", "text").lower()
|
112 |
if feature in input_data.columns: # Check if feature exists
|
113 |
try:
|
114 |
+
current_value = input_data[feature].iloc[0]
|
115 |
+
# Skip conversion if value is already None or NaN equivalent
|
116 |
+
if pd.isna(current_value):
|
117 |
+
continue
|
118 |
+
|
119 |
+
if feature_type == 'numerical':
|
120 |
# Convert to numeric, coercing errors (users might enter text)
|
121 |
input_data[feature] = pd.to_numeric(input_data[feature], errors='coerce')
|
122 |
+
elif feature_type == 'categorical':
|
123 |
+
# Ensure categorical inputs are treated as strings by the model if needed
|
124 |
+
# PyCaret often expects object/string type for categoricals in predict_model
|
125 |
+
input_data[feature] = input_data[feature].astype(str)
|
126 |
+
# Add elif for other types if needed (e.g., datetime)
|
127 |
+
# else: # text
|
128 |
# input_data[feature] = input_data[feature].astype(str) # Ensure string type
|
129 |
+
|
130 |
except Exception as type_e:
|
131 |
+
logger.warning(f"Could not convert feature '{feature}' (value: {current_value}) to type '{feature_type}'. Error: {type_e}")
|
132 |
# Decide how to handle type conversion errors, e.g., set to NaN or keep original
|
133 |
input_data[feature] = pd.NA # Set to missing if conversion fails
|
|
|
134 |
else:
|
135 |
logger.warning(f"Feature '{feature}' from schema not found in input form data.")
|
136 |
|
137 |
|
138 |
# Handle potential NaN values from coercion or failed conversion
|
139 |
if input_data.isnull().values.any():
|
140 |
+
st.warning("Some inputs might be invalid or missing. Attempting to handle missing values (e.g., replacing with 0 for numerical). Check logs for details.")
|
141 |
+
logger.warning(f"NaN values found in input data after type conversion/validation. Filling numerical with 0. Data before fill:\n{input_data}")
|
142 |
# More robust imputation might be needed depending on the model
|
143 |
+
# Fill only numerical NaNs with 0, leave others? Or use mode for categoricals?
|
144 |
+
for feature, details in APP_SCHEMA.items():
|
145 |
+
if details.get("type") == "numerical" and input_data[feature].isnull().any():
|
146 |
+
input_data[feature].fillna(0, inplace=True)
|
147 |
+
# input_data.fillna(0, inplace=True) # Previous simpler strategy
|
148 |
+
logger.info(f"Data after filling NaN:\n{input_data}")
|
149 |
|
150 |
|
151 |
+
st.markdown("##### Input Data Sent to Model (after processing):")
|
152 |
st.dataframe(input_data)
|
153 |
|
154 |
# Make prediction
|
|
|
160 |
predictions = predict_model(model, data=input_data)
|
161 |
logger.info("Prediction successful.")
|
162 |
|
163 |
+
st.markdown("##### Prediction Result:")
|
164 |
logger.info(f"Prediction output columns: {predictions.columns.tolist()}")
|
165 |
|
166 |
# Display relevant prediction columns (adjust based on PyCaret task)
|
|
|
170 |
|
171 |
if pred_col_label in predictions.columns:
|
172 |
st.success(f"Predicted Label: **{predictions[pred_col_label].iloc[0]}**")
|
173 |
+
# Also show score if available for classification
|
174 |
+
if pred_col_score in predictions.columns and pycaret_task_module == 'pycaret.classification':
|
175 |
+
st.info(f"Prediction Score: **{predictions[pred_col_score].iloc[0]:.4f}**")
|
176 |
+
# Handle regression output (usually just score)
|
177 |
+
elif pred_col_score in predictions.columns and pycaret_task_module == 'pycaret.regression':
|
178 |
+
st.success(f"Predicted Value: **{predictions[pred_col_score].iloc[0]:.4f}**")
|
179 |
else:
|
180 |
# Fallback: Display the last column as prediction if specific ones aren't found
|
181 |
try:
|
182 |
+
# Exclude input columns if they are present in the output df
|
183 |
+
output_columns = [col for col in predictions.columns if col not in input_data.columns]
|
184 |
+
if output_columns:
|
185 |
+
last_col_name = output_columns[-1]
|
186 |
+
st.info(f"Prediction Output (Column: '{last_col_name}'): **{predictions[last_col_name].iloc[0]}**")
|
187 |
+
logger.warning(f"Could not find standard prediction columns. Displaying last non-input column: '{last_col_name}'")
|
188 |
+
else: # If only input columns are returned (unlikely)
|
189 |
+
st.warning("Prediction output seems to only contain input columns.")
|
190 |
except IndexError:
|
191 |
+
st.error("Prediction result DataFrame is empty or has unexpected format.")
|
192 |
+
logger.error("Prediction result DataFrame is empty or has unexpected format.")
|
193 |
|
194 |
|
195 |
# Show full prediction output optionally
|
196 |
+
with st.expander("View Full Prediction Output DataFrame"):
|
197 |
st.dataframe(predictions)
|
198 |
|
199 |
except Exception as e:
|