CosmickVisions commited on
Commit
a11a4d6
·
verified ·
1 Parent(s): 312ac91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -50
app.py CHANGED
@@ -945,19 +945,18 @@ elif app_mode == "EDA":
945
  elif app_mode == "Model Training":
946
  st.title("🤖 Intelligent Model Training")
947
 
948
- # Universal check for all dependent pages
949
  if 'cleaned_data' not in st.session_state:
950
  st.warning("No cleaned data found! Please either:")
951
-
952
  col1, col2 = st.columns(2)
953
  with col1:
954
  if st.button("↩️ Go to Data Cleaning"):
955
  st.session_state.app_mode = "Data Cleaning"
956
  st.experimental_rerun()
957
-
958
  with col2:
959
- uploaded_clean = st.file_uploader("📤 Or upload clean data",
960
- type=["csv", "xlsx"])
961
  if uploaded_clean:
962
  try:
963
  st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
@@ -965,20 +964,20 @@ elif app_mode == "Model Training":
965
  st.experimental_rerun()
966
  except Exception as e:
967
  st.error(f"Invalid file: {str(e)}")
968
-
969
  st.stop() # Halt execution until resolved
970
 
971
  # Only reaches here if cleaned_data exists
972
  df = st.session_state.cleaned_data.copy()
973
-
974
  # Model Setup
975
  col1, col2, col3 = st.columns(3)
976
  with col1:
977
  target = st.selectbox("Select Target Variable", df.columns)
978
- problem_type = st.selectbox("Problem Type", ["Classification", "Regression", "Multiclass"]) #Added Multiclass
979
  with col2:
980
  available_features = df.columns.drop(target)
981
- features = st.multiselect("Select Features", available_features, default=list(available_features)) # Select all as default
982
  with col3:
983
  test_size = st.slider("Test Size", 0.1, 0.5, 0.2)
984
 
@@ -988,25 +987,25 @@ elif app_mode == "Model Training":
988
  elif problem_type == "Classification":
989
  model_type = st.selectbox("Select Classification Model", ["Random Forest", "Gradient Boosting", "Neural Network"])
990
  elif problem_type == "Multiclass":
991
- model_type = st.selectbox("Select Multiclass Model", ["Logistic Regression", "Support Vector Machine", "Random Forest"]) #Added SVM and Logistic Regression
992
  else:
993
- model_type = None #handle this
994
 
995
  # Hyperparameter Configuration - Dynamic based on Model Type
996
  st.subheader("Hyperparameter Configuration")
997
  model_params = {}
998
 
999
- if model_type == "Neural Network": #Add options for NN parameters
1000
  hidden_layers = st.text_input("Hidden Layer Sizes (e.g., 50,50 for two layers of 50 neurons)", "50,50")
1001
  activation = st.selectbox("Activation Function", ["relu", "tanh", "logistic"])
1002
  alpha = st.number_input("L2 Regularization (Alpha)", value=0.0001)
1003
 
1004
- #Process the hidden layers string to a tuple of ints
1005
  try:
1006
  hidden_layer_sizes = tuple(map(int, hidden_layers.split(',')))
1007
  model_params['hidden_layer_sizes'] = hidden_layer_sizes
1008
  except ValueError:
1009
- st.error("Invalid format for Hidden Layer Sizes. Use comma-separated integers (e.g., 50,50)")
1010
 
1011
  model_params['activation'] = activation
1012
  model_params['alpha'] = alpha
@@ -1019,6 +1018,7 @@ elif app_mode == "Model Training":
1019
  model_params['n_estimators'] = n_estimators
1020
  model_params['learning_rate'] = learning_rate
1021
  model_params['max_depth'] = max_depth
 
1022
  elif model_type == "Logistic Regression":
1023
  c_value = st.number_input("C (Regularization)", value=1.0)
1024
  model_params['C'] = c_value
@@ -1035,12 +1035,10 @@ elif app_mode == "Model Training":
1035
  model_params['n_estimators'] = n_estimators
1036
  model_params['max_depth'] = max_depth
1037
 
1038
-
1039
-
1040
  use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
1041
 
1042
  # In Model Training section - Fix indentation for training logic
1043
- if st.button("Train Model"):
1044
  if not features:
1045
  st.error("Please select at least one feature.")
1046
  st.stop()
@@ -1051,52 +1049,49 @@ elif app_mode == "Model Training":
1051
  df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search
1052
  )
1053
 
1054
- if model: # Only proceed if training was successful
1055
- st.success("Model trained successfully!")
1056
-
1057
- # Display Metrics
1058
- st.subheader("Model Evaluation Metrics")
1059
- if problem_type in ["Classification", "Multiclass"]: # Combined here
1060
- st.metric("Accuracy", f"{metrics['accuracy']:.2%}")
1061
-
1062
- # Confusion Matrix Visualization
1063
- st.subheader("Confusion Matrix")
1064
- cm = metrics['confusion_matrix']
1065
- class_names = [str(i) for i in np.unique(df[target])] # Get original class names
1066
- fig_cm = px.imshow(cm,
1067
- labels=dict(x="Predicted", y="Actual"),
1068
- x=class_names,
1069
- y=class_names,
1070
- color_continuous_scale="Viridis")
1071
- st.plotly_chart(fig_cm, use_container_width=True)
1072
-
1073
- # Classification Report
1074
- st.subheader("Classification Report")
1075
- report = metrics['classification_report']
1076
- report_df = pd.DataFrame(report).transpose()
1077
- st.dataframe(report_df)
1078
 
1079
- else:
1080
- st.metric("MSE", f"{metrics['mse']:.2f}")
1081
- st.metric("R2", f"{metrics['r2']:.2f}")
1082
-
1083
- # A
1084
-
1085
- # Additional model display code...
1086
 
 
1087
 
1088
  # Feature Importance
1089
  st.subheader("Feature Importance")
1090
  try:
1091
  fig_importance = px.bar(
1092
  x=importance,
1093
- y=column_order, #Use stored column order
1094
  orientation='h',
1095
  title="Feature Importance"
1096
  )
1097
  st.plotly_chart(fig_importance, use_container_width=True)
1098
  except Exception as e:
1099
- st.warning(f"Could not display feature importance: {e}")
1100
 
1101
  # Explainable AI (Placeholder)
1102
  st.subheader("Explainable AI (XAI)")
 
945
  elif app_mode == "Model Training":
946
  st.title("🤖 Intelligent Model Training")
947
 
948
+ # Universal check for all dependent pages
949
  if 'cleaned_data' not in st.session_state:
950
  st.warning("No cleaned data found! Please either:")
951
+
952
  col1, col2 = st.columns(2)
953
  with col1:
954
  if st.button("↩️ Go to Data Cleaning"):
955
  st.session_state.app_mode = "Data Cleaning"
956
  st.experimental_rerun()
957
+
958
  with col2:
959
+ uploaded_clean = st.file_uploader("📤 Or upload clean data", type=["csv", "xlsx"])
 
960
  if uploaded_clean:
961
  try:
962
  st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
 
964
  st.experimental_rerun()
965
  except Exception as e:
966
  st.error(f"Invalid file: {str(e)}")
967
+
968
  st.stop() # Halt execution until resolved
969
 
970
  # Only reaches here if cleaned_data exists
971
  df = st.session_state.cleaned_data.copy()
972
+
973
  # Model Setup
974
  col1, col2, col3 = st.columns(3)
975
  with col1:
976
  target = st.selectbox("Select Target Variable", df.columns)
977
+ problem_type = st.selectbox("Problem Type", ["Classification", "Regression", "Multiclass"]) # Added Multiclass
978
  with col2:
979
  available_features = df.columns.drop(target)
980
+ features = st.multiselect("Select Features", available_features, default=list(available_features)) # Select all as default
981
  with col3:
982
  test_size = st.slider("Test Size", 0.1, 0.5, 0.2)
983
 
 
987
  elif problem_type == "Classification":
988
  model_type = st.selectbox("Select Classification Model", ["Random Forest", "Gradient Boosting", "Neural Network"])
989
  elif problem_type == "Multiclass":
990
+ model_type = st.selectbox("Select Multiclass Model", ["Logistic Regression", "Support Vector Machine", "Random Forest"]) # Added SVM and Logistic Regression
991
  else:
992
+ model_type = None # handle this
993
 
994
  # Hyperparameter Configuration - Dynamic based on Model Type
995
  st.subheader("Hyperparameter Configuration")
996
  model_params = {}
997
 
998
+ if model_type == "Neural Network": # Add options for NN parameters
999
  hidden_layers = st.text_input("Hidden Layer Sizes (e.g., 50,50 for two layers of 50 neurons)", "50,50")
1000
  activation = st.selectbox("Activation Function", ["relu", "tanh", "logistic"])
1001
  alpha = st.number_input("L2 Regularization (Alpha)", value=0.0001)
1002
 
1003
+ # Process the hidden layers string to a tuple of ints
1004
  try:
1005
  hidden_layer_sizes = tuple(map(int, hidden_layers.split(',')))
1006
  model_params['hidden_layer_sizes'] = hidden_layer_sizes
1007
  except ValueError:
1008
+ st.error("Invalid format for Hidden Layer Sizes. Use comma-separated integers (e.g., 50,50)")
1009
 
1010
  model_params['activation'] = activation
1011
  model_params['alpha'] = alpha
 
1018
  model_params['n_estimators'] = n_estimators
1019
  model_params['learning_rate'] = learning_rate
1020
  model_params['max_depth'] = max_depth
1021
+
1022
  elif model_type == "Logistic Regression":
1023
  c_value = st.number_input("C (Regularization)", value=1.0)
1024
  model_params['C'] = c_value
 
1035
  model_params['n_estimators'] = n_estimators
1036
  model_params['max_depth'] = max_depth
1037
 
 
 
1038
  use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
1039
 
1040
  # In Model Training section - Fix indentation for training logic
1041
+ if st.button("Train Model"):
1042
  if not features:
1043
  st.error("Please select at least one feature.")
1044
  st.stop()
 
1049
  df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search
1050
  )
1051
 
1052
+ if model: # Only proceed if training was successful
1053
+ st.success("Model trained successfully!")
1054
+
1055
+ # Display Metrics
1056
+ st.subheader("Model Evaluation Metrics")
1057
+ if problem_type in ["Classification", "Multiclass"]: # Combined here
1058
+ st.metric("Accuracy", f"{metrics['accuracy']:.2%}")
1059
+
1060
+ # Confusion Matrix Visualization
1061
+ st.subheader("Confusion Matrix")
1062
+ cm = metrics['confusion_matrix']
1063
+ class_names = [str(i) for i in np.unique(df[target])] # Get original class names
1064
+ fig_cm = px.imshow(cm,
1065
+ labels=dict(x="Predicted", y="Actual"),
1066
+ x=class_names,
1067
+ y=class_names,
1068
+ color_continuous_scale="Viridis")
1069
+ st.plotly_chart(fig_cm, use_container_width=True)
1070
+
1071
+ # Classification Report
1072
+ st.subheader("Classification Report")
1073
+ report = metrics['classification_report']
1074
+ report_df = pd.DataFrame(report).transpose()
1075
+ st.dataframe(report_df)
1076
 
1077
+ else:
1078
+ st.metric("MSE", f"{metrics['mse']:.2f}")
1079
+ st.metric("R2", f"{metrics['r2']:.2f}")
 
 
 
 
1080
 
1081
+ # Additional model display code...
1082
 
1083
  # Feature Importance
1084
  st.subheader("Feature Importance")
1085
  try:
1086
  fig_importance = px.bar(
1087
  x=importance,
1088
+ y=column_order, # Use stored column order
1089
  orientation='h',
1090
  title="Feature Importance"
1091
  )
1092
  st.plotly_chart(fig_importance, use_container_width=True)
1093
  except Exception as e:
1094
+ st.warning((f"Could not display feature importance: {e}")
1095
 
1096
  # Explainable AI (Placeholder)
1097
  st.subheader("Explainable AI (XAI)")