Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 2

Commit

c2f6adc

verified ·

1 Parent(s): 3c337af

Update app.py

Browse files

Files changed (1) hide show

app.py +217 -200

app.py CHANGED Viewed

@@ -23,6 +23,16 @@ from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
 import joblib  # For saving and loading models
 import os  # For file directory
 import shap
 from datetime import datetime
 from stqdm import stqdm
@@ -941,217 +951,224 @@ elif app_mode == "EDA":
             except Exception as e:
                 st.error(f"Could not generate analysis report. Ensure pandas-profiling is installed correctly.")
-# Streamlit App
 elif app_mode == "Model Training":
-    st.title("🤖 Intelligent Model Training")
-    # Universal check for all dependent pages
-    if 'cleaned_data' not in st.session_state:
-        st.warning("No cleaned data found! Please either:")
         col1, col2 = st.columns(2)
         with col1:
-            if st.button("↩️ Go to Data Cleaning"):
-                st.session_state.app_mode = "Data Cleaning"
                 st.experimental_rerun()
         with col2:
-            uploaded_clean = st.file_uploader("📤 Or upload clean data", type=["csv", "xlsx"])
-            if uploaded_clean:
-                try:
-                    st.session_state.cleaned_data = pd.read_csv(uploaded_clean)
-                    st.success("Loaded clean data!")
-                    st.experimental_rerun()
-                except Exception as e:
-                    st.error(f"Invalid file: {str(e)}")
-        st.stop()  # Halt execution until resolved
-    # Only reaches here if cleaned_data exists
-    df = st.session_state.cleaned_data.copy()
-    # Model Setup
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        target = st.selectbox("Select Target Variable", df.columns)
-        problem_type = st.selectbox("Problem Type", ["Classification", "Regression", "Multiclass"])  # Added Multiclass
-    with col2:
-        available_features = df.columns.drop(target)
-        features = st.multiselect("Select Features", available_features, default=list(available_features))  # Select all as default
-    with col3:
-        test_size = st.slider("Test Size", 0.1, 0.5, 0.2)
-    # Model Type Selection
-    if problem_type == "Regression":
-        model_type = st.selectbox("Select Regression Model", ["Random Forest", "Gradient Boosting", "Neural Network"])
-    elif problem_type == "Classification":
-        model_type = st.selectbox("Select Classification Model", ["Random Forest", "Gradient Boosting", "Neural Network"])
-    elif problem_type == "Multiclass":
-        model_type = st.selectbox("Select Multiclass Model", ["Logistic Regression", "Support Vector Machine", "Random Forest"])  # Added SVM and Logistic Regression
-    else:
-        model_type = None  # handle this
-    # Hyperparameter Configuration - Dynamic based on Model Type
-    st.subheader("Hyperparameter Configuration")
-    model_params = {}
-    if model_type == "Neural Network":  # Add options for NN parameters
-        hidden_layers = st.text_input("Hidden Layer Sizes (e.g., 50,50 for two layers of 50 neurons)", "50,50")
-        activation = st.selectbox("Activation Function", ["relu", "tanh", "logistic"])
-        alpha = st.number_input("L2 Regularization (Alpha)", value=0.0001)
-        # Process the hidden layers string to a tuple of ints
         try:
-            hidden_layer_sizes = tuple(map(int, hidden_layers.split(',')))
-            model_params['hidden_layer_sizes'] = hidden_layer_sizes
-        except ValueError:
-            st.error("Invalid format for Hidden Layer Sizes. Use comma-separated integers (e.g., 50,50)")
-        model_params['activation'] = activation
-        model_params['alpha'] = alpha
-    elif model_type == "Gradient Boosting":
-        n_estimators = st.slider("Number of Estimators", 50, 300, 100)
-        learning_rate = st.number_input("Learning Rate", value=0.1)
-        max_depth = st.slider("Max Depth", 2, 10, 3)
-        model_params['n_estimators'] = n_estimators
-        model_params['learning_rate'] = learning_rate
-        model_params['max_depth'] = max_depth
-    elif model_type == "Logistic Regression":
-        c_value = st.number_input("C (Regularization)", value=1.0)
-        model_params['C'] = c_value
-    elif model_type == "Support Vector Machine":
-        c_value = st.number_input("C (Regularization)", value=1.0)
-        kernel_type = st.selectbox("Kernel Type", ['rbf', 'linear', 'poly', 'sigmoid'])
-        model_params['C'] = c_value
-        model_params['kernel'] = kernel_type
-    elif model_type == "Random Forest":
-        n_estimators = st.slider("Number of Estimators", 50, 300, 100)
-        max_depth = st.slider("Max Depth", 2, 10, 3)
-        model_params['n_estimators'] = n_estimators
-        model_params['max_depth'] = max_depth
-    use_grid_search = st.checkbox("Use Grid Search for Hyperparameter Tuning")
-    # In Model Training section - Fix indentation for training logic
-    if st.button("Train Model"):
-        if not features:
-            st.error("Please select at least one feature.")
-            st.stop()
-        # Indent all this code under the button click
-        # Call the training function
-        model, scaler, label_encoder, imputer_numerical, metrics, column_order, importance, X_train, y_train = train_model(
-            df.copy(), target, features, problem_type, test_size, model_type, model_params, use_grid_search
-        )
-        if model:  # Only proceed if training was successful
-            st.success("Model trained successfully!")
-            # Display Metrics
-            st.subheader("Model Evaluation Metrics")
-            if problem_type in ["Classification", "Multiclass"]:  # Combined here
-                st.metric("Accuracy", f"{metrics['accuracy']:.2%}")
-                # Confusion Matrix Visualization
-                st.subheader("Confusion Matrix")
-                cm = metrics['confusion_matrix']
-                class_names = [str(i) for i in np.unique(df[target])]  # Get original class names
-                fig_cm = px.imshow(cm,
-                                   labels=dict(x="Predicted", y="Actual"),
-                                   x=class_names,
-                                   y=class_names,
-                                   color_continuous_scale="Viridis")
-                st.plotly_chart(fig_cm, use_container_width=True)
-                # Classification Report
-                st.subheader("Classification Report")
-                report = metrics['classification_report']
-                report_df = pd.DataFrame(report).transpose()
-                st.dataframe(report_df)
             else:
-                st.metric("MSE", f"{metrics['mse']:.2f}")
-                st.metric("R2", f"{metrics['r2']:.2f}")
-            # Additional model display code...
-            # Feature Importance
-            st.subheader("Feature Importance")
-            try:
-                fig_importance = px.bar(
-                    x=importance,
-                    y=column_order,  # Use stored column order
-                    orientation='h',
-                    title="Feature Importance"
                 )
-                st.plotly_chart(fig_importance, use_container_width=True)
-            except Exception as e:
-                st.warning(f"Could not display feature importance: {e}")
-             # Explainable AI (Placeholder)
-            st.subheader("Explainable AI (XAI)")
-            st.write("Future implementation will include model explanations using techniques like SHAP or LIME.") #To be implemented
-            if st.checkbox("Show a random model explanation (example)"): #Example of a feature, to be implemented
-                 st.write("This feature is important because...")
-            # Save Model
-            st.subheader("Save Model")
-            model_name = st.text_input("Enter model name (without extension)", "my_model")
-            if st.button("Save Model"):
-                try:
-                    model_path = f"{model_name}.joblib"
-                    joblib.dump({
-                        'model': model,
-                        'scaler': scaler,
-                        'label_encoder': label_encoder,
-                        'imputer_numerical': imputer_numerical,
-                        'column_order': column_order,
-                        'features': features,
-                        'target': target,
-                        'problem_type': problem_type,
-                        'model_type': model_type,
-                        'model_params': model_params,
-                        'X_train': X_train, # Store X_train
-                        'y_train': y_train  # Store y_train
-                    }, model_path)
-                    st.success(f"Model saved as {model_path}")
-                except Exception as e:
-                    st.error(f"Error saving model: {e}")
-    # Model Validation Section
-    st.header("Model Validation")
-    model_path_validate = st.text_input("Enter path to saved model for validation", "my_model.joblib")
-    if st.button("Validate Model"):
-        if not os.path.exists(model_path_validate):
-            st.error("Model file not found.")
-        else:
-            validation_metrics, problem_type = validate_model(model_path_validate, df.copy(), target, features, test_size) #Pass a copy of the dataframe
-            if validation_metrics:
-                st.subheader("Validation Metrics")
-                if problem_type in ["Classification", "Multiclass"]: #Combined here
-                    st.metric("Accuracy", f"{validation_metrics['accuracy']:.2%}")
-                    st.subheader("Confusion Matrix")
-                    cm = validation_metrics['confusion_matrix']
-                    class_names = [str(i) for i in np.unique(df[target])] #Get original class names
-                    fig_cm = px.imshow(cm,
-                                        labels=dict(x="Predicted", y="Actual"),
-                                        x=class_names,
-                                        y=class_names,
-                                        color_continuous_scale="Viridis")
-                    st.plotly_chart(fig_cm, use_container_width=True)
-                    st.subheader("Classification Report")
-                    report = validation_metrics['classification_report']
-                    report_df = pd.DataFrame(report).transpose()
-                    st.dataframe(report_df)
-                else:
-                    st.metric("MSE", f"{validation_metrics['mse']:.2f}")
-                    st.metric("R2", f"{validation_metrics['r2']:.2f}")
 # Predictions Section (Fixed)
 if app_mode == "Predictions":

 from streamlit_pandas_profiling import st_profile_report
 import joblib  # For saving and loading models
 import os  # For file directory
+# Advanced
+from transformers import TFBertModel
+import tensorflow as tf
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, Conv2D, LSTM, Embedding, Dropout, Flatten, MaxPooling2D, BatchNormalization
+from tensorflow.keras.applications import MobileNetV2, ResNet50
+from tensorflow.keras.utils import plot_model
+from tensorflow.keras.callbacks import Callback
+import tf2onnx
+import onnx
 import shap
 from datetime import datetime
 from stqdm import stqdm
             except Exception as e:
                 st.error(f"Could not generate analysis report. Ensure pandas-profiling is installed correctly.")
 elif app_mode == "Model Training":
+    st.title("🧠 Advanced Model Architect")
+    # ----- [1. Preset Selection] -----
+    with st.sidebar.expander("🚀 Quick Start", expanded=True):
+        presets = st.selectbox("Load Preset", [
+            "None",
+            "CNN-MNIST",
+            "LSTM-Text",
+            "ResNet-Lite",
+            "Transformer-NLP"
+        ])
+        if presets != "None":
+            if presets == "CNN-MNIST":
+                st.session_state.layers = [
+                    {"type": "Conv2D", "filters":32, "kernel_size":3},
+                    {"type": "MaxPooling2D", "pool_size":2},
+                    {"type": "Flatten"},
+                    {"type": "Dense", "units":10}
+                ]
+            elif presets == "LSTM-Text":
+                st.session_state.layers = [
+                    {"type": "Embedding", "input_dim":10000, "output_dim":128},
+                    {"type": "LSTM", "units":64},
+                    {"type": "Dense", "units":1, "activation":"sigmoid"}
+                ]
+            st.experimental_rerun()
+    # ----- [2. Base Model & Transfer Learning] -----
+    with st.expander("🏗️ Transfer Learning", expanded=False):
         col1, col2 = st.columns(2)
         with col1:
+            base_model = st.selectbox("Base Model", [
+                "None",
+                "MobileNetV2",
+                "ResNet50",
+                "BERT"
+            ])
+        with col2:
+            if base_model != "None":
+                freeze_layers = st.checkbox("Freeze Base Layers", True)
+                custom_input = st.checkbox("Custom Input Shape", False)
+                if base_model == "MobileNetV2":
+                    model = tf.keras.applications.MobileNetV2(
+                        include_top=False,
+                        weights='imagenet',
+                        input_shape=(224, 224, 3) if custom_input else None
+                    )
+                    st.info(f"Loaded {base_model} with {len(model.layers)} layers")
+    # ----- [3. Layer Configuration] -----
+    st.subheader("🏗️ Network Architecture")
+    # Dynamic layer builder
+    layer_types = [
+        "Dense", "Conv2D", "LSTM",
+        "Dropout", "BatchNorm", "Flatten"
+    ]
+    if 'layers' not in st.session_state:
+        st.session_state.layers = []
+    for i, layer in enumerate(st.session_state.layers):
+        cols = st.columns([1,3,2])
+        with cols[0]:
+            st.markdown(f"**Layer {i+1}**")
+        with cols[1]:
+            st.code(f"{layer['type']}: {dict((k,v) for k,v in layer.items() if k != 'type')}")
+        with cols[2]:
+            if st.button(f"❌ Remove {i+1}", key=f"remove_{i}"):
+                del st.session_state.layers[i]
                 st.experimental_rerun()
+    # Add new layer controls
+    with st.expander("➕ Add New Layer", expanded=True):
+        new_layer_type = st.selectbox("Layer Type", layer_types)
+        new_layer_params = {}
+        if new_layer_type == "Dense":
+            new_layer_params["units"] = st.number_input("Units", 1, 1024, 128)
+            new_layer_params["activation"] = st.selectbox(
+                "Activation", ["relu", "sigmoid", "tanh"]
+            )
+        elif new_layer_type == "Conv2D":
+            new_layer_params["filters"] = st.number_input("Filters", 1, 256, 32)
+            new_layer_params["kernel_size"] = st.number_input("Kernel Size", 1, 9, 3)
+        if st.button("Add Layer"):
+            st.session_state.layers.append({
+                "type": new_layer_type,
+                **new_layer_params
+            })
+            st.experimental_rerun()
+    # ----- [4. Regularization & Advanced Options] -----
+    with st.expander("⚙️ Advanced Configuration", expanded=False):
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("Regularization")
+            l2_reg = st.number_input("L2 Regularization", 0.0, 0.1, 0.001)
+            dropout = st.number_input("Global Dropout", 0.0, 0.5, 0.2)
+            batch_norm = st.checkbox("Batch Normalization")
         with col2:
+            st.subheader("Optimization")
+            optimizer = st.selectbox("Optimizer", [
+                "adam", "sgd", "rmsprop",
+                "nadam", "adamax"
+            ])
+            loss = st.selectbox("Loss Function", [
+                "categorical_crossentropy",
+                "binary_crossentropy",
+                "mse",
+                "mae"
+            ])
+            metrics = st.multiselect("Metrics", [
+                "accuracy", "precision",
+                "recall", "auc"
+            ])
+    # ----- [5. Training & Monitoring] -----
+    st.subheader("🎯 Training Configuration")
+    class LiveMetrics(Callback):
+        def on_epoch_end(self, epoch, logs=None):
+            if 'metrics' not in st.session_state:
+                st.session_state.metrics = []
+            st.session_state.metrics.append(logs)
+            self.update_chart()
+        def update_chart(self):
+            df = pd.DataFrame(st.session_state.metrics)
+            fig = px.line(df, y=['loss', 'val_loss'],
+                         title="Training Progress")
+            loss_chart.plotly_chart(fig)
+    if st.button("🚀 Start Training"):
         try:
+            model = tf.keras.Sequential()
+            # Add layers with regularization
+            for layer in st.session_state.layers:
+                layer_class = {
+                    "Dense": Dense,
+                    "Conv2D": Conv2D,
+                    "LSTM": LSTM
+                }[layer['type']]
+                # Add regularization
+                if l2_reg > 0:
+                    layer['kernel_regularizer'] = tf.keras.regularizers.l2(l2_reg)
+                model.add(layer_class(**layer))
+                # Add batch norm after each layer
+                if batch_norm:
+                    model.add(BatchNormalization())
+                # Add global dropout
+                model.add(Dropout(dropout))
+            model.compile(
+                optimizer=optimizer,
+                loss=loss,
+                metrics=metrics
+            )
+            # Show model summary
+            st.subheader("Model Architecture")
+            with tempfile.NamedTemporaryFile(suffix='.png') as tmp:
+                plot_model(model, to_file=tmp.name, show_shapes=True)
+                st.image(tmp.name)
+            # Start training
+            st.subheader("Live Training Metrics")
+            loss_chart = st.empty()
+            model.fit(X_train, y_train,
+                      epochs=10,
+                      validation_data=(X_val, y_val),
+                      callbacks=[LiveMetrics()])
+        except Exception as e:
+            st.error(f"Training failed: {str(e)}")
+    # ----- [6. Export & Deployment] -----
+    st.subheader("💾 Export Model")
+    export_format = st.radio("Format", [
+        "TensorFlow SavedModel",
+        "HDF5",
+        "ONNX"
+    ])
+    if st.button("Export"):
+        with tempfile.NamedTemporaryFile(delete=False) as tmp:
+            if export_format == "HDF5":
+                model.save(tmp.name + '.h5')
+            elif export_format == "ONNX":
+                import tf2onnx
+                model_proto, _ = tf2onnx.convert.from_keras(model)
+                with open(tmp.name + '.onnx', 'wb') as f:
+                    f.write(model_proto.SerializeToString())
             else:
+                tf.saved_model.save(model, tmp.name)
+            with open(tmp.name, 'rb') as f:
+                st.download_button(
+                    "Download Model",
+                    f.read(),
+                    file_name=f"model.{'h5' if export_format=='HDF5' else 'onnx'}"
                 )
 # Predictions Section (Fixed)
 if app_mode == "Predictions":