Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -945,19 +945,18 @@ elif app_mode == "EDA":
|
|
| 945 |
elif app_mode == "Model Training":
|
| 946 |
st.title("🤖 Intelligent Model Training")
|
| 947 |
|
| 948 |
-
|
| 949 |
if 'cleaned_data' not in st.session_state:
|
| 950 |
st.warning("No cleaned data found! Please either:")
|
| 951 |
-
|
| 952 |
col1, col2 = st.columns(2)
|
| 953 |
with col1:
|
| 954 |
if st.button("↩️ Go to Data Cleaning"):
|
| 955 |
st.session_state.app_mode = "Data Cleaning"
|
| 956 |
st.experimental_rerun()
|
| 957 |
-
|
| 958 |
with col2:
|
| 959 |
-
uploaded_clean = st.file_uploader("📤 Or upload clean data",
|
| 960 |
-
type=["csv", "xlsx"])
|
| 961 |
if uploaded_clean:
|
| 962 |
try:
|
| 963 |
st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
|
|
@@ -965,20 +964,20 @@ elif app_mode == "Model Training":
|
|
| 965 |
st.experimental_rerun()
|
| 966 |
except Exception as e:
|
| 967 |
st.error(f"Invalid file: {str(e)}")
|
| 968 |
-
|
| 969 |
st.stop() # Halt execution until resolved
|
| 970 |
|
| 971 |
# Only reaches here if cleaned_data exists
|
| 972 |
df = st.session_state.cleaned_data.copy()
|
| 973 |
-
|
| 974 |
# Model Setup
|
| 975 |
col1, col2, col3 = st.columns(3)
|
| 976 |
with col1:
|
| 977 |
target = st.selectbox("Select Target Variable", df.columns)
|
| 978 |
-
problem_type = st.selectbox("Problem Type", ["Classification", "Regression", "Multiclass"])
|
| 979 |
with col2:
|
| 980 |
available_features = df.columns.drop(target)
|
| 981 |
-
features = st.multiselect("Select Features", available_features, default=list(available_features))
|
| 982 |
with col3:
|
| 983 |
test_size = st.slider("Test Size", 0.1, 0.5, 0.2)
|
| 984 |
|
|
@@ -988,25 +987,25 @@ elif app_mode == "Model Training":
|
|
| 988 |
elif problem_type == "Classification":
|
| 989 |
model_type = st.selectbox("Select Classification Model", ["Random Forest", "Gradient Boosting", "Neural Network"])
|
| 990 |
elif problem_type == "Multiclass":
|
| 991 |
-
model_type = st.selectbox("Select Multiclass Model", ["Logistic Regression", "Support Vector Machine", "Random Forest"])
|
| 992 |
else:
|
| 993 |
-
model_type = None
|
| 994 |
|
| 995 |
# Hyperparameter Configuration - Dynamic based on Model Type
|
| 996 |
st.subheader("Hyperparameter Configuration")
|
| 997 |
model_params = {}
|
| 998 |
|
| 999 |
-
if model_type == "Neural Network":
|
| 1000 |
hidden_layers = st.text_input("Hidden Layer Sizes (e.g., 50,50 for two layers of 50 neurons)", "50,50")
|
| 1001 |
activation = st.selectbox("Activation Function", ["relu", "tanh", "logistic"])
|
| 1002 |
alpha = st.number_input("L2 Regularization (Alpha)", value=0.0001)
|
| 1003 |
|
| 1004 |
-
#Process the hidden layers string to a tuple of ints
|
| 1005 |
try:
|
| 1006 |
hidden_layer_sizes = tuple(map(int, hidden_layers.split(',')))
|
| 1007 |
model_params['hidden_layer_sizes'] = hidden_layer_sizes
|
| 1008 |
except ValueError:
|
| 1009 |
-
st.error("Invalid format for Hidden Layer Sizes.
|
| 1010 |
|
| 1011 |
model_params['activation'] = activation
|
| 1012 |
model_params['alpha'] = alpha
|
|
@@ -1019,6 +1018,7 @@ elif app_mode == "Model Training":
|
|
| 1019 |
model_params['n_estimators'] = n_estimators
|
| 1020 |
model_params['learning_rate'] = learning_rate
|
| 1021 |
model_params['max_depth'] = max_depth
|
|
|
|
| 1022 |
elif model_type == "Logistic Regression":
|
| 1023 |
c_value = st.number_input("C (Regularization)", value=1.0)
|
| 1024 |
model_params['C'] = c_value
|
|
@@ -1035,12 +1035,10 @@ elif app_mode == "Model Training":
|
|
| 1035 |
model_params['n_estimators'] = n_estimators
|
| 1036 |
model_params['max_depth'] = max_depth
|
| 1037 |
|
| 1038 |
-
|
| 1039 |
-
|
| 1040 |
use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
|
| 1041 |
|
| 1042 |
# In Model Training section - Fix indentation for training logic
|
| 1043 |
-
|
| 1044 |
if not features:
|
| 1045 |
st.error("Please select at least one feature.")
|
| 1046 |
st.stop()
|
|
@@ -1051,52 +1049,49 @@ elif app_mode == "Model Training":
|
|
| 1051 |
df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search
|
| 1052 |
)
|
| 1053 |
|
| 1054 |
-
|
| 1055 |
-
|
| 1056 |
-
|
| 1057 |
-
|
| 1058 |
-
|
| 1059 |
-
|
| 1060 |
-
|
| 1061 |
-
|
| 1062 |
-
|
| 1063 |
-
|
| 1064 |
-
|
| 1065 |
-
|
| 1066 |
-
|
| 1067 |
-
|
| 1068 |
-
|
| 1069 |
-
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
|
| 1076 |
-
|
| 1077 |
-
|
| 1078 |
|
| 1079 |
-
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
# A
|
| 1084 |
-
|
| 1085 |
-
# Additional model display code...
|
| 1086 |
|
|
|
|
| 1087 |
|
| 1088 |
# Feature Importance
|
| 1089 |
st.subheader("Feature Importance")
|
| 1090 |
try:
|
| 1091 |
fig_importance = px.bar(
|
| 1092 |
x=importance,
|
| 1093 |
-
y=column_order,
|
| 1094 |
orientation='h',
|
| 1095 |
title="Feature Importance"
|
| 1096 |
)
|
| 1097 |
st.plotly_chart(fig_importance, use_container_width=True)
|
| 1098 |
except Exception as e:
|
| 1099 |
-
st.warning(f"Could not display feature importance: {e}")
|
| 1100 |
|
| 1101 |
# Explainable AI (Placeholder)
|
| 1102 |
st.subheader("Explainable AI (XAI)")
|
|
|
|
| 945 |
elif app_mode == "Model Training":
|
| 946 |
st.title("🤖 Intelligent Model Training")
|
| 947 |
|
| 948 |
+
# Universal check for all dependent pages
|
| 949 |
if 'cleaned_data' not in st.session_state:
|
| 950 |
st.warning("No cleaned data found! Please either:")
|
| 951 |
+
|
| 952 |
col1, col2 = st.columns(2)
|
| 953 |
with col1:
|
| 954 |
if st.button("↩️ Go to Data Cleaning"):
|
| 955 |
st.session_state.app_mode = "Data Cleaning"
|
| 956 |
st.experimental_rerun()
|
| 957 |
+
|
| 958 |
with col2:
|
| 959 |
+
uploaded_clean = st.file_uploader("📤 Or upload clean data", type=["csv", "xlsx"])
|
|
|
|
| 960 |
if uploaded_clean:
|
| 961 |
try:
|
| 962 |
st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
|
|
|
|
| 964 |
st.experimental_rerun()
|
| 965 |
except Exception as e:
|
| 966 |
st.error(f"Invalid file: {str(e)}")
|
| 967 |
+
|
| 968 |
st.stop() # Halt execution until resolved
|
| 969 |
|
| 970 |
# Only reaches here if cleaned_data exists
|
| 971 |
df = st.session_state.cleaned_data.copy()
|
| 972 |
+
|
| 973 |
# Model Setup
|
| 974 |
col1, col2, col3 = st.columns(3)
|
| 975 |
with col1:
|
| 976 |
target = st.selectbox("Select Target Variable", df.columns)
|
| 977 |
+
problem_type = st.selectbox("Problem Type", ["Classification", "Regression", "Multiclass"]) # Added Multiclass
|
| 978 |
with col2:
|
| 979 |
available_features = df.columns.drop(target)
|
| 980 |
+
features = st.multiselect("Select Features", available_features, default=list(available_features)) # Select all as default
|
| 981 |
with col3:
|
| 982 |
test_size = st.slider("Test Size", 0.1, 0.5, 0.2)
|
| 983 |
|
|
|
|
| 987 |
elif problem_type == "Classification":
|
| 988 |
model_type = st.selectbox("Select Classification Model", ["Random Forest", "Gradient Boosting", "Neural Network"])
|
| 989 |
elif problem_type == "Multiclass":
|
| 990 |
+
model_type = st.selectbox("Select Multiclass Model", ["Logistic Regression", "Support Vector Machine", "Random Forest"]) # Added SVM and Logistic Regression
|
| 991 |
else:
|
| 992 |
+
model_type = None # handle this
|
| 993 |
|
| 994 |
# Hyperparameter Configuration - Dynamic based on Model Type
|
| 995 |
st.subheader("Hyperparameter Configuration")
|
| 996 |
model_params = {}
|
| 997 |
|
| 998 |
+
if model_type == "Neural Network": # Add options for NN parameters
|
| 999 |
hidden_layers = st.text_input("Hidden Layer Sizes (e.g., 50,50 for two layers of 50 neurons)", "50,50")
|
| 1000 |
activation = st.selectbox("Activation Function", ["relu", "tanh", "logistic"])
|
| 1001 |
alpha = st.number_input("L2 Regularization (Alpha)", value=0.0001)
|
| 1002 |
|
| 1003 |
+
# Process the hidden layers string to a tuple of ints
|
| 1004 |
try:
|
| 1005 |
hidden_layer_sizes = tuple(map(int, hidden_layers.split(',')))
|
| 1006 |
model_params['hidden_layer_sizes'] = hidden_layer_sizes
|
| 1007 |
except ValueError:
|
| 1008 |
+
st.error("Invalid format for Hidden Layer Sizes. Use comma-separated integers (e.g., 50,50)")
|
| 1009 |
|
| 1010 |
model_params['activation'] = activation
|
| 1011 |
model_params['alpha'] = alpha
|
|
|
|
| 1018 |
model_params['n_estimators'] = n_estimators
|
| 1019 |
model_params['learning_rate'] = learning_rate
|
| 1020 |
model_params['max_depth'] = max_depth
|
| 1021 |
+
|
| 1022 |
elif model_type == "Logistic Regression":
|
| 1023 |
c_value = st.number_input("C (Regularization)", value=1.0)
|
| 1024 |
model_params['C'] = c_value
|
|
|
|
| 1035 |
model_params['n_estimators'] = n_estimators
|
| 1036 |
model_params['max_depth'] = max_depth
|
| 1037 |
|
|
|
|
|
|
|
| 1038 |
use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
|
| 1039 |
|
| 1040 |
# In Model Training section - Fix indentation for training logic
|
| 1041 |
+
if st.button("Train Model"):
|
| 1042 |
if not features:
|
| 1043 |
st.error("Please select at least one feature.")
|
| 1044 |
st.stop()
|
|
|
|
| 1049 |
df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search
|
| 1050 |
)
|
| 1051 |
|
| 1052 |
+
if model: # Only proceed if training was successful
|
| 1053 |
+
st.success("Model trained successfully!")
|
| 1054 |
+
|
| 1055 |
+
# Display Metrics
|
| 1056 |
+
st.subheader("Model Evaluation Metrics")
|
| 1057 |
+
if problem_type in ["Classification", "Multiclass"]: # Combined here
|
| 1058 |
+
st.metric("Accuracy", f"{metrics['accuracy']:.2%}")
|
| 1059 |
+
|
| 1060 |
+
# Confusion Matrix Visualization
|
| 1061 |
+
st.subheader("Confusion Matrix")
|
| 1062 |
+
cm = metrics['confusion_matrix']
|
| 1063 |
+
class_names = [str(i) for i in np.unique(df[target])] # Get original class names
|
| 1064 |
+
fig_cm = px.imshow(cm,
|
| 1065 |
+
labels=dict(x="Predicted", y="Actual"),
|
| 1066 |
+
x=class_names,
|
| 1067 |
+
y=class_names,
|
| 1068 |
+
color_continuous_scale="Viridis")
|
| 1069 |
+
st.plotly_chart(fig_cm, use_container_width=True)
|
| 1070 |
+
|
| 1071 |
+
# Classification Report
|
| 1072 |
+
st.subheader("Classification Report")
|
| 1073 |
+
report = metrics['classification_report']
|
| 1074 |
+
report_df = pd.DataFrame(report).transpose()
|
| 1075 |
+
st.dataframe(report_df)
|
| 1076 |
|
| 1077 |
+
else:
|
| 1078 |
+
st.metric("MSE", f"{metrics['mse']:.2f}")
|
| 1079 |
+
st.metric("R2", f"{metrics['r2']:.2f}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1080 |
|
| 1081 |
+
# Additional model display code...
|
| 1082 |
|
| 1083 |
# Feature Importance
|
| 1084 |
st.subheader("Feature Importance")
|
| 1085 |
try:
|
| 1086 |
fig_importance = px.bar(
|
| 1087 |
x=importance,
|
| 1088 |
+
y=column_order, # Use stored column order
|
| 1089 |
orientation='h',
|
| 1090 |
title="Feature Importance"
|
| 1091 |
)
|
| 1092 |
st.plotly_chart(fig_importance, use_container_width=True)
|
| 1093 |
except Exception as e:
|
| 1094 |
+
st.warning((f"Could not display feature importance: {e}")
|
| 1095 |
|
| 1096 |
# Explainable AI (Placeholder)
|
| 1097 |
st.subheader("Explainable AI (XAI)")
|