Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -687,47 +687,7 @@ elif app_mode == "Advanced EDA":
|
|
687 |
st.write("**Sample Data**")
|
688 |
st.dataframe(df.head())
|
689 |
|
690 |
-
# Model
|
691 |
-
elif app_mode == "Model Training":
|
692 |
-
st.title("π Model Training Studio")
|
693 |
-
st.markdown("""
|
694 |
-
**Train and Evaluate Machine Learning Models** with advanced hyperparameter tuning and performance tracking.
|
695 |
-
Choose from a wide range of algorithms and configurations.
|
696 |
-
""")
|
697 |
-
|
698 |
-
# Initialize session state variables
|
699 |
-
if 'model' not in st.session_state:
|
700 |
-
st.session_state.model = None
|
701 |
-
if 'preprocessor' not in st.session_state:
|
702 |
-
st.session_state.preprocessor = None
|
703 |
-
if 'X_train_selected' not in st.session_state:
|
704 |
-
st.session_state.X_train_selected = None
|
705 |
-
if 'X_test_selected' not in st.session_state:
|
706 |
-
st.session_state.X_test_selected = None
|
707 |
-
if 'y_train' not in st.session_state:
|
708 |
-
st.session_state.y_train = None
|
709 |
-
if 'y_test' not in st.session_state:
|
710 |
-
st.session_state.y_test = None
|
711 |
-
|
712 |
-
df = st.session_state.cleaned_data.copy()
|
713 |
-
|
714 |
-
# Target Variable Selection
|
715 |
-
st.subheader("π― Target Variable")
|
716 |
-
target_column = st.selectbox("Select Target Variable", df.columns, help="Choose the column to predict.")
|
717 |
-
|
718 |
-
# Problem Type Selection
|
719 |
-
st.subheader("π Problem Type")
|
720 |
-
problem_type = st.radio("Select Problem Type", ["Regression", "Classification"], help="Choose the type of machine learning problem.")
|
721 |
-
|
722 |
-
# Feature Selection
|
723 |
-
st.subheader("π§ Feature Selection")
|
724 |
-
use_all_features = st.checkbox("Use All Features", value=True, help="Select to use all features for training. Deselect to manually choose features.")
|
725 |
-
if use_all_features:
|
726 |
-
feature_columns = df.drop(columns=[target_column]).columns.tolist()
|
727 |
-
else:
|
728 |
-
feature_columns = st.multiselect("Select Feature Columns", df.drop(columns=[target_column]).columns, help="Choose the features you want to use for prediction.")
|
729 |
-
|
730 |
-
# Model Selection
|
731 |
st.subheader("π€ Model Selection")
|
732 |
if problem_type == "Regression":
|
733 |
model_options = ["Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Neural Network"]
|
@@ -735,23 +695,44 @@ elif app_mode == "Model Training":
|
|
735 |
model_options = ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Neural Network", "KNN", "Naive Bayes"]
|
736 |
model_name = st.selectbox("Select Model", model_options, help="Choose a model.")
|
737 |
|
738 |
-
|
739 |
-
|
740 |
-
|
741 |
-
|
742 |
-
|
743 |
-
|
744 |
-
|
745 |
-
|
746 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
747 |
elif model_name == "Neural Network":
|
|
|
|
|
|
|
|
|
748 |
hidden_layers = st.slider("Number of Hidden Layers", 1, 5, 2)
|
749 |
neurons_per_layer = st.slider("Neurons per Layer", 10, 200, 50)
|
750 |
-
activation = st.selectbox("Activation Function",
|
|
|
751 |
dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2)
|
752 |
-
initializer = st.selectbox("Weight Initializer",
|
|
|
753 |
learning_rate = st.slider("Learning Rate", 0.0001, 0.1, 0.001, format="%.4f")
|
754 |
-
optimizer_choice = st.selectbox("Optimizer",
|
|
|
755 |
batch_norm = st.checkbox("Batch Normalization", value=True)
|
756 |
regularization = st.checkbox("L2 Regularization")
|
757 |
epochs = st.slider("Epochs", 10, 200, 50)
|
@@ -769,9 +750,8 @@ elif app_mode == "Model Training":
|
|
769 |
'epochs': epochs,
|
770 |
'batch_size': batch_size
|
771 |
}
|
772 |
-
|
773 |
-
|
774 |
-
hyperparams = {}
|
775 |
|
776 |
# Train-Test Split
|
777 |
st.subheader("βοΈ Train-Test Split")
|
|
|
687 |
st.write("**Sample Data**")
|
688 |
st.dataframe(df.head())
|
689 |
|
690 |
+
# Model Selection
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
691 |
st.subheader("π€ Model Selection")
|
692 |
if problem_type == "Regression":
|
693 |
model_options = ["Linear Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Neural Network"]
|
|
|
695 |
model_options = ["Logistic Regression", "Decision Tree", "Random Forest", "Gradient Boosting", "SVM", "Neural Network", "KNN", "Naive Bayes"]
|
696 |
model_name = st.selectbox("Select Model", model_options, help="Choose a model.")
|
697 |
|
698 |
+
# Hyperparameter Tuning
|
699 |
+
st.subheader("ποΈ Hyperparameter Tuning")
|
700 |
+
with st.expander("Configure Hyperparameters", expanded=True):
|
701 |
+
if model_name == "Random Forest":
|
702 |
+
n_estimators = st.slider("Number of Estimators", 10, 200, 100)
|
703 |
+
max_depth = st.slider("Max Depth", 3, 20, 10)
|
704 |
+
min_samples_split = st.slider("Min Samples Split", 2, 10, 2)
|
705 |
+
min_samples_leaf = st.slider("Min Samples Leaf", 1, 10, 1)
|
706 |
+
hyperparams = {
|
707 |
+
'n_estimators': n_estimators,
|
708 |
+
'max_depth': max_depth,
|
709 |
+
'min_samples_split': min_samples_split,
|
710 |
+
'min_samples_leaf': min_samples_leaf
|
711 |
+
}
|
712 |
+
elif model_name == "Gradient Boosting": # Correct placement of elif
|
713 |
+
learning_rate = st.slider("Learning Rate", 0.01, 1.0, 0.1)
|
714 |
+
n_estimators = st.slider("Number of Estimators", 10, 200, 100)
|
715 |
+
max_depth = st.slider("Max Depth", 3, 20, 10)
|
716 |
+
hyperparams = {
|
717 |
+
'learning_rate': learning_rate,
|
718 |
+
'n_estimators': n_estimators,
|
719 |
+
'max_depth': max_depth
|
720 |
+
}
|
721 |
elif model_name == "Neural Network":
|
722 |
+
from tensorflow.keras.models import Sequential
|
723 |
+
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
|
724 |
+
from tensorflow.keras.optimizers import Adam, Nadam, RMSprop, SGD
|
725 |
+
|
726 |
hidden_layers = st.slider("Number of Hidden Layers", 1, 5, 2)
|
727 |
neurons_per_layer = st.slider("Neurons per Layer", 10, 200, 50)
|
728 |
+
activation = st.selectbox("Activation Function",
|
729 |
+
["relu", "tanh", "sigmoid", "selu", "swish"])
|
730 |
dropout_rate = st.slider("Dropout Rate", 0.0, 0.5, 0.2)
|
731 |
+
initializer = st.selectbox("Weight Initializer",
|
732 |
+
["glorot_uniform", "he_normal", "lecun_uniform"])
|
733 |
learning_rate = st.slider("Learning Rate", 0.0001, 0.1, 0.001, format="%.4f")
|
734 |
+
optimizer_choice = st.selectbox("Optimizer",
|
735 |
+
["Adam", "Nadam", "RMSprop", "SGD"])
|
736 |
batch_norm = st.checkbox("Batch Normalization", value=True)
|
737 |
regularization = st.checkbox("L2 Regularization")
|
738 |
epochs = st.slider("Epochs", 10, 200, 50)
|
|
|
750 |
'epochs': epochs,
|
751 |
'batch_size': batch_size
|
752 |
}
|
753 |
+
else:
|
754 |
+
hyperparams = {}
|
|
|
755 |
|
756 |
# Train-Test Split
|
757 |
st.subheader("βοΈ Train-Test Split")
|