Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -409,6 +409,27 @@ def validate_model(model_path, df, target, features, test_size):
|
|
409 |
st.error(f"Validation failed: {str(e)}")
|
410 |
return None, None
|
411 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
# --------------------------
|
413 |
# Sidebar Navigation
|
414 |
# --------------------------
|
@@ -1039,40 +1060,106 @@ elif app_mode == "Model Training":
|
|
1039 |
st.metric("R2", f"{validation_metrics['r2']:.2f}")
|
1040 |
|
1041 |
elif app_mode == "Predictions":
|
1042 |
-
st.title("๐ฎ Predictive Analytics")
|
1043 |
-
|
1044 |
-
if st.session_state.model is None:
|
1045 |
st.warning("Please train a model first")
|
1046 |
st.stop()
|
1047 |
-
|
1048 |
-
|
1049 |
-
|
1050 |
-
|
1051 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1052 |
with col1:
|
1053 |
-
|
1054 |
-
|
1055 |
-
for feature in model.feature_names_in_:
|
1056 |
-
input_data[feature] = st.number_input(feature)
|
1057 |
-
|
1058 |
with col2:
|
1059 |
-
st.subheader("
|
1060 |
-
|
1061 |
-
|
1062 |
-
|
1063 |
-
|
1064 |
-
|
1065 |
-
|
1066 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1067 |
explainer = shap.TreeExplainer(model)
|
1068 |
-
shap_values = explainer.shap_values(
|
1069 |
-
|
1070 |
-
|
1071 |
-
|
1072 |
-
|
1073 |
-
|
1074 |
-
|
1075 |
-
|
1076 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1077 |
except Exception as e:
|
1078 |
-
st.
|
|
|
|
|
|
|
|
409 |
st.error(f"Validation failed: {str(e)}")
|
410 |
return None, None
|
411 |
|
412 |
+
# Prediction helper Function
|
413 |
+
def prediction_input_form(features, default_values=None):
|
414 |
+
"""Generates input forms for each feature and returns a dictionary of inputs.
|
415 |
+
|
416 |
+
Args:
|
417 |
+
features (list): List of feature names.
|
418 |
+
default_values (dict, optional): Default values for each feature. Defaults to None.
|
419 |
+
|
420 |
+
Returns:
|
421 |
+
dict: Dictionary where keys are feature names and values are user inputs.
|
422 |
+
"""
|
423 |
+
input_data = {}
|
424 |
+
for feature in features:
|
425 |
+
if default_values and feature in default_values:
|
426 |
+
default_value = default_values[feature]
|
427 |
+
else:
|
428 |
+
default_value = 0.0 # Provide a reasonable default
|
429 |
+
|
430 |
+
input_data[feature] = st.number_input(f"{feature}:", value=default_value)
|
431 |
+
return input_data
|
432 |
+
|
433 |
# --------------------------
|
434 |
# Sidebar Navigation
|
435 |
# --------------------------
|
|
|
1060 |
st.metric("R2", f"{validation_metrics['r2']:.2f}")
|
1061 |
|
1062 |
elif app_mode == "Predictions":
|
1063 |
+
st.title("๐ฎ Predictive Analytics - Informed Business Decisions")
|
1064 |
+
|
1065 |
+
if st.session_state.get("model") is None:
|
1066 |
st.warning("Please train a model first")
|
1067 |
st.stop()
|
1068 |
+
|
1069 |
+
model_data = st.session_state.model # Get the entire dictionary
|
1070 |
+
model = model_data['model'] # Access model
|
1071 |
+
scaler = model_data['scaler']
|
1072 |
+
column_order = model_data['column_order']
|
1073 |
+
imputer_numerical = model_data['imputer_numerical']
|
1074 |
+
features = model_data['features']
|
1075 |
+
problem_type = model_data['problem_type']
|
1076 |
+
|
1077 |
+
enhance_section_title("Input Parameters", "๐")
|
1078 |
+
|
1079 |
+
# Generate Input Forms
|
1080 |
+
col1, col2 = st.columns([2, 1])
|
1081 |
+
|
1082 |
with col1:
|
1083 |
+
input_data = prediction_input_form(features)
|
1084 |
+
|
|
|
|
|
|
|
1085 |
with col2:
|
1086 |
+
st.subheader("Data Overview")
|
1087 |
+
input_df = pd.DataFrame([input_data]) #Make DataFrame
|
1088 |
+
st.dataframe(input_df,use_container_width=True) #DataFrame of the input to see it
|
1089 |
+
|
1090 |
+
# Predicts Function and Displays Result
|
1091 |
+
if st.button("Generate Prediction & Insights"):
|
1092 |
+
try:
|
1093 |
+
# 1. Create DataFrame from input
|
1094 |
+
input_df = pd.DataFrame([input_data])
|
1095 |
+
|
1096 |
+
# 2. Impute missing values
|
1097 |
+
numerical_features = input_df.select_dtypes(include=np.number).columns
|
1098 |
+
input_df[numerical_features] = imputer_numerical.transform(input_df[numerical_features])
|
1099 |
+
|
1100 |
+
# 3. One-hot encode (handle unseen categories)
|
1101 |
+
categorical_features = input_df.select_dtypes(exclude=np.number).columns
|
1102 |
+
input_df = pd.get_dummies(input_df, columns=categorical_features, dummy_na=False) # dummy_na = False. We imputed already.
|
1103 |
+
|
1104 |
+
# 4. Ensure correct column order
|
1105 |
+
# Add missing columns with 0 values
|
1106 |
+
for col in column_order:
|
1107 |
+
if col not in input_df.columns:
|
1108 |
+
input_df[col] = 0
|
1109 |
+
# Reorder Columns
|
1110 |
+
input_df = input_df[column_order]
|
1111 |
+
|
1112 |
+
# 5. Scale the input
|
1113 |
+
scaled_input = scaler.transform(input_df)
|
1114 |
+
|
1115 |
+
# 6. Make prediction
|
1116 |
+
prediction = model.predict(scaled_input)[0]
|
1117 |
+
|
1118 |
+
# 7. Display Prediction
|
1119 |
+
enhance_section_title("Prediction Results", "๐")
|
1120 |
+
st.subheader("Model Prediction:")
|
1121 |
+
|
1122 |
+
if problem_type == "Classification":
|
1123 |
+
st.metric("Predicted Class", str(prediction))
|
1124 |
+
else:
|
1125 |
+
st.metric("Predicted Value", f"{prediction:.2f}")
|
1126 |
+
|
1127 |
+
# 8. Feature Explanation (SHAP)
|
1128 |
+
enhance_section_title("Insights", "๐ก")
|
1129 |
+
|
1130 |
+
if problem_type == "Classification":
|
1131 |
explainer = shap.TreeExplainer(model)
|
1132 |
+
shap_values = explainer.shap_values(scaled_input) # Use the scaled input
|
1133 |
+
# class_names = [str(i) for i in range(len(shap_values))] # Dynamic class names - not needed for force plot
|
1134 |
+
|
1135 |
+
fig = shap.force_plot(explainer.expected_value[1], shap_values[1], input_df, matplotlib=False,link="logit") # shap_values[1] for class 1 - force plot
|
1136 |
+
st.components.v1.html(shap.getjs() + fig.html(), height=400, width=900) # Adjust height and width as needed.
|
1137 |
+
|
1138 |
+
else:
|
1139 |
+
explainer = shap.TreeExplainer(model) # Regression
|
1140 |
+
shap_values = explainer.shap_values(scaled_input) # Use the scaled input
|
1141 |
+
|
1142 |
+
fig = shap.force_plot(explainer.expected_value, shap_values, input_df, matplotlib=False) # shap_values single array for regression
|
1143 |
+
st.components.v1.html(shap.getjs() + fig.html(), height=400, width=900) # Adjust height and width as needed.
|
1144 |
+
|
1145 |
+
st.write("The visualization above explains how each feature contributed to the final prediction.")
|
1146 |
+
|
1147 |
+
# 9. Add Permutation Feature Importance (for more global understanding)
|
1148 |
+
try:
|
1149 |
+
enhance_section_title("Global Feature Importance", "๐")
|
1150 |
+
X = pd.DataFrame(scaler.transform(pd.get_dummies(pd.DataFrame(imputer_numerical.transform(input_df), columns=input_df.columns))), columns=input_df.columns) # Apply preprocessing for permutation
|
1151 |
+
#X = pd.DataFrame(scaler.transform(input_df), columns = input_df.columns)
|
1152 |
+
#X = input_df[input_df.columns]
|
1153 |
+
X_train = model_data['X_train'] #Get X train
|
1154 |
+
y_train = model_data['y_train'] #Get Y train
|
1155 |
+
result = permutation_importance(model, X, input_df, n_repeats=10, random_state=42)
|
1156 |
+
importance = result.importances_mean
|
1157 |
+
|
1158 |
+
fig_importance = px.bar(x=importance, y=features, orientation='h', title="Permutation Feature Importance")
|
1159 |
+
st.plotly_chart(fig_importance)
|
1160 |
+
|
1161 |
except Exception as e:
|
1162 |
+
st.warning(f"Could not calculate permutation feature importance: {e}")
|
1163 |
+
|
1164 |
+
except Exception as e:
|
1165 |
+
st.error(f"Prediction failed: {str(e)}")
|