Spaces:

CosmickVisions
/

Neural-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Mar 20

Commit

f0536a5

verified ·

1 Parent(s): f8de685

Update app.py

Browse files

Files changed (1) hide show

app.py +990 -128

app.py CHANGED Viewed

@@ -1,14 +1,17 @@
 import streamlit as st
 import pandas as pd
 import plotly.express as px
 import numpy as np
 from sklearn.model_selection import train_test_split
 from sklearn.neural_network import MLPClassifier, MLPRegressor
-from sklearn.cluster import KMeans
 from sklearn.metrics import accuracy_score, r2_score, silhouette_score, confusion_matrix, classification_report, mean_squared_error
 from sklearn.preprocessing import StandardScaler
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
 from groq import Groq
 from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -18,6 +21,11 @@ from langchain_community.tools.tavily_search import TavilySearchResults
 import os
 from dotenv import load_dotenv
 import tempfile
 # Load environment variables
 load_dotenv()
@@ -141,6 +149,103 @@ st.markdown("""
         transform: translateY(-2px);
         box-shadow: 0 6px 16px rgba(59, 130, 246, 0.4);
     }
     </style>
 """, unsafe_allow_html=True)
@@ -234,165 +339,922 @@ def plot_clusters(X, labels):
     fig = px.scatter(X, x=X.columns[0], y=X.columns[1], color=labels, title="Cluster Visualization")
     return fig
 # Pages
 def data_upload_page():
-    st.header("📤 Data Upload & Analysis")
-    uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
     if uploaded_file:
-        df = pd.read_csv(uploaded_file)
-        st.session_state.df = df
-        st.session_state.vector_store = create_vector_store(convert_df_to_text(df))
-        st.session_state.metrics = {}
-        st.subheader("Dataset Health Check")
-        col1, col2, col3 = st.columns(3)
-        col1.metric("Total Samples", df.shape[0])
-        col2.metric("Features", df.shape[1])
-        col3.metric("Missing Values", df.isna().sum().sum())
-        if st.button("Generate Full EDA Report"):
-            with st.spinner("Generating comprehensive analysis..."):
-                profile = ProfileReport(df, explorative=True)
-                st_profile_report(profile)
 def model_training_page():
-    st.header("🧠 Neural Network Training Studio")
     if 'df' not in st.session_state:
-        st.warning("Upload data first!")
         return
     df = st.session_state.df
-    problem_type = st.selectbox("Select Problem Type", ["Classification", "Regression", "Clustering"])
-    mode = st.selectbox("Domain Specialization", ["Legal", "Financial", "Academic", "Technical"])
-    if problem_type != "Clustering":
-        target = st.selectbox("Select Target Variable", df.columns)
-        X = df.drop(columns=[target])
-        y = df[target]
-    else:
-        X = df
-        y = None
-    if st.button("Train Neural Network"):
-        with st.spinner("Training in progress..."):
-            X_scaled = StandardScaler().fit_transform(X)
-            X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) if y is not None else (X_scaled, None, None, None)
-            if problem_type == "Classification":
-                model = MLPClassifier(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
-                model.fit(X_train, y_train)
-                y_pred = model.predict(X_test)
-                st.session_state.metrics = {
-                    "Accuracy": accuracy_score(y_test, y_pred),
-                    "Classification Report": classification_report(y_test, y_pred, output_dict=True)
-                }
-            elif problem_type == "Regression":
-                model = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=500, random_state=42)
-                model.fit(X_train, y_train)
-                y_pred = model.predict(X_test)
-                st.session_state.metrics = {
-                    "R2 Score": r2_score(y_test, y_pred),
-                    "Mean Squared Error": mean_squared_error(y_test, y_pred)
-                }
-            else:  # Clustering
-                model = KMeans(n_clusters=3, random_state=42)
-                labels = model.fit_predict(X_scaled)
-                st.session_state.metrics = {
-                    "Silhouette Score": silhouette_score(X_scaled, labels)
-                }
-            st.session_state.best_model = model
-            st.session_state.X_test = X_test
-            st.session_state.y_test = y_test
-            st.session_state.y_pred = y_pred if y is not None else labels
-            st.session_state.problem_type = problem_type
-            st.success(f"Model trained successfully in {mode} mode!")
 def visualization_page():
-    st.header("🔍 Neural Network Evaluation Center")
     if 'best_model' not in st.session_state:
-        st.warning("Train a model first!")
         return
-    st.subheader("Performance Analysis")
-    if st.session_state.problem_type == "Classification":
-        st.plotly_chart(plot_confusion_matrix(st.session_state.y_test, st.session_state.y_pred))
-        st.plotly_chart(plot_feature_importance(st.session_state.best_model, pd.DataFrame(st.session_state.X_test, columns=st.session_state.df.columns[:-1])))
-    elif st.session_state.problem_type == "Regression":
-        st.plotly_chart(plot_residuals(st.session_state.y_test, st.session_state.y_pred))
-        st.plotly_chart(plot_feature_importance(st.session_state.best_model, pd.DataFrame(st.session_state.X_test, columns=st.session_state.df.columns[:-1])))
-    else:  # Clustering
-        st.plotly_chart(plot_clusters(pd.DataFrame(st.session_state.X_test, columns=st.session_state.df.columns), st.session_state.y_pred))
-    st.subheader("Metrics")
-    st.write(st.session_state.metrics)
-# Chatbot Interface
 def ai_assistant():
     st.markdown('<div class="chat-container">', unsafe_allow_html=True)
-    st.subheader("🧠 Neural Insight Assistant (RAG + Web Search)")
-    use_web_search = st.checkbox("Enable Tavily Web Search", value=False)
-    mode = st.selectbox("Domain Mode", ["Legal", "Financial", "Academic", "Technical"], key="chat_mode")
-    for msg in st.session_state.chat_history:
-        with st.chat_message(msg["role"]):
-            st.markdown(f'<div class="{msg["role"]}-message">{msg["content"]}</div>', unsafe_allow_html=True)
-    if prompt := st.chat_input("Ask about data, models, or web insights..."):
-        st.session_state.chat_history.append({"role": "user", "content": prompt})
-        with st.chat_message("user"):
-            st.markdown(f'<div class="user-message">{prompt}</div>', unsafe_allow_html=True)
-        with st.spinner("Processing..."):
-            response = get_groq_response(prompt, mode, use_web_search)
-            st.session_state.chat_history.append({"role": "assistant", "content": response})
-        with st.chat_message("assistant"):
-            st.markdown(f'<div class="bot-message">{response}</div>', unsafe_allow_html=True)
     st.markdown('</div>', unsafe_allow_html=True)
-# Main App Layout
-st.markdown("""
-    <div class="header">
-        <h1 class="header-title">Neural-Vision Enhanced</h1>
-        <div class="header-subtitle">Neural Network Development for Domain-Specialized Analysis</div>
-    </div>
-""", unsafe_allow_html=True)
-with st.sidebar:
-    st.title("🔮 Neural-Vision Enhanced")
-    page = st.selectbox("Navigation", [
-        "Data Upload & Analysis",
-        "Neural Network Training Studio",
-        "Neural Network Evaluation Center"
-    ])
-    st.session_state.active_page = page
-    st.markdown("---")
-    st.markdown("**Environment Setup**")
-    # Tavily API Key Input and Submit Button
-    tavily_api_input = st.text_input("Tavily API Key", type="password", help="Enter your Tavily API key for web search functionality")
-    if st.button("Submit API Key"):
-        if tavily_api_input:
-            st.session_state.tavily_api_key = tavily_api_input
-            st.success("Tavily API Key submitted successfully!")
-        else:
-            st.warning("Please enter a valid API key.")
-    st.markdown("---")
-    st.markdown("v5.0 | © 2025 Neural-Vision")
-# Page Routing
-if "Data Upload & Analysis" in page:
-    data_upload_page()
-elif "Neural Network Training Studio" in page:
-    model_training_page()
-else:
-    visualization_page()
-ai_assistant()

 import streamlit as st
 import pandas as pd
 import plotly.express as px
+import plotly.graph_objects as go
 import numpy as np
 from sklearn.model_selection import train_test_split
 from sklearn.neural_network import MLPClassifier, MLPRegressor
+from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor
 from sklearn.metrics import accuracy_score, r2_score, silhouette_score, confusion_matrix, classification_report, mean_squared_error
 from sklearn.preprocessing import StandardScaler
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
+from streamlit_lottie import st_lottie
 from groq import Groq
 from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 from dotenv import load_dotenv
 import tempfile
+import datetime
+import time
+import matplotlib.pyplot as plt
+import shap
+import xgboost as xgb
 # Load environment variables
 load_dotenv()
         transform: translateY(-2px);
         box-shadow: 0 6px 16px rgba(59, 130, 246, 0.4);
     }
+    /* Card styles */
+    .card {
+        background: var(--white);
+        border-radius: 16px;
+        box-shadow: 0 4px 16px rgba(0,0,0,0.1);
+        padding: 20px;
+        margin-bottom: 25px;
+        transition: all 0.3s ease;
+    }
+    .card:hover {
+        box-shadow: 0 8px 24px rgba(0,0,0,0.15);
+        transform: translateY(-2px);
+    }
+    /* Step header styles */
+    .step-header {
+        display: flex;
+        align-items: center;
+        margin-bottom: 15px;
+    }
+    .step-counter {
+        background: var(--primary-blue);
+        color: var(--white);
+        width: 36px;
+        height: 36px;
+        border-radius: 50%;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        font-weight: bold;
+        margin-right: 15px;
+        box-shadow: 0 4px 10px rgba(59, 130, 246, 0.3);
+    }
+    .step-title {
+        font-size: 1.5rem;
+        font-weight: 700;
+        color: var(--dark-blue);
+    }
+    /* Notification styles */
+    .notification {
+        display: flex;
+        align-items: center;
+        background: #ECFDF5;
+        border-left: 4px solid #059669;
+        color: #065F46;
+        padding: 15px;
+        border-radius: 8px;
+        margin: 15px 0;
+        box-shadow: 0 2px 8px rgba(5, 150, 105, 0.1);
+        transition: transform 0.2s ease;
+    }
+    .notification:hover {
+        transform: translateY(-2px);
+    }
+    .notification-icon {
+        background: #059669;
+        color: white;
+        width: 24px;
+        height: 24px;
+        border-radius: 50%;
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        margin-right: 15px;
+        font-weight: bold;
+    }
+    /* Metrics card styles */
+    .metrics-card {
+        background: var(--white);
+        border-radius: 12px;
+        padding: 15px;
+        box-shadow: 0 2px 10px rgba(0,0,0,0.08);
+        text-align: center;
+        transition: all 0.3s ease;
+        height: 100%;
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+    }
+    .metrics-card:hover {
+        transform: translateY(-3px);
+        box-shadow: 0 6px 16px rgba(0,0,0,0.12);
+    }
+    .metrics-value {
+        font-size: 2rem;
+        font-weight: 700;
+        color: var(--primary-blue);
+        margin-bottom: 5px;
+    }
+    .metrics-label {
+        color: var(--medium-grey);
+        font-size: 0.9rem;
+        font-weight: 500;
+    }
     </style>
 """, unsafe_allow_html=True)
     fig = px.scatter(X, x=X.columns[0], y=X.columns[1], color=labels, title="Cluster Visualization")
     return fig
+def plot_learning_curve(model, X, y, cv=5):
+    """Plot learning curve to show model performance with increasing data"""
+    from sklearn.model_selection import learning_curve
+    train_sizes, train_scores, test_scores = learning_curve(
+        model, X, y, cv=cv, scoring='accuracy' if hasattr(y, 'nunique') else 'r2',
+        n_jobs=-1, train_sizes=np.linspace(0.1, 1.0, 10))
+    train_mean = np.mean(train_scores, axis=1)
+    train_std = np.std(train_scores, axis=1)
+    test_mean = np.mean(test_scores, axis=1)
+    test_std = np.std(test_scores, axis=1)
+    # Create DataFrame for plotting
+    df_curve = pd.DataFrame({
+        'Training Size (%)': train_sizes / len(X) * 100,
+        'Training Score': train_mean,
+        'Training Upper': train_mean + train_std,
+        'Training Lower': train_mean - train_std,
+        'Testing Score': test_mean,
+        'Testing Upper': test_mean + test_std,
+        'Testing Lower': test_mean - test_std
+    })
+    # Create the plot
+    fig = go.Figure()
+    # Add training data with confidence interval
+    fig.add_trace(go.Scatter(
+        x=df_curve['Training Size (%)'],
+        y=df_curve['Training Score'],
+        mode='lines+markers',
+        name='Training Score',
+        line=dict(color='blue', width=2),
+        marker=dict(size=8)
+    ))
+    fig.add_trace(go.Scatter(
+        x=df_curve['Training Size (%)'],
+        y=df_curve['Training Upper'],
+        mode='lines',
+        line=dict(width=0),
+        showlegend=False
+    ))
+    fig.add_trace(go.Scatter(
+        x=df_curve['Training Size (%)'],
+        y=df_curve['Training Lower'],
+        mode='lines',
+        line=dict(width=0),
+        fill='tonexty',
+        fillcolor='rgba(0, 0, 255, 0.1)',
+        showlegend=False
+    ))
+    # Add testing data with confidence interval
+    fig.add_trace(go.Scatter(
+        x=df_curve['Training Size (%)'],
+        y=df_curve['Testing Score'],
+        mode='lines+markers',
+        name='Testing Score',
+        line=dict(color='red', width=2),
+        marker=dict(size=8)
+    ))
+    fig.add_trace(go.Scatter(
+        x=df_curve['Training Size (%)'],
+        y=df_curve['Testing Upper'],
+        mode='lines',
+        line=dict(width=0),
+        showlegend=False
+    ))
+    fig.add_trace(go.Scatter(
+        x=df_curve['Training Size (%)'],
+        y=df_curve['Testing Lower'],
+        mode='lines',
+        line=dict(width=0),
+        fill='tonexty',
+        fillcolor='rgba(255, 0, 0, 0.1)',
+        showlegend=False
+    ))
+    # Update layout
+    fig.update_layout(
+        title='Learning Curve',
+        xaxis_title='Training Set Size (%)',
+        yaxis_title='Score',
+        hovermode='x unified',
+        width=700,
+        height=400,
+        legend=dict(
+            orientation="h",
+            yanchor="bottom",
+            y=1.02,
+            xanchor="right",
+            x=1
+        )
+    )
+    return fig
+def plot_shap_summary(model, X):
+    """Create SHAP summary plot for model explainability"""
+    try:
+        # Create explainer based on model type
+        if hasattr(model, 'predict_proba'):
+            explainer = shap.Explainer(model)
+        else:
+            explainer = shap.Explainer(model)
+        # Calculate SHAP values
+        shap_values = explainer(X)
+        # Create the SHAP summary plot
+        plt.figure(figsize=(10, 8))
+        shap.summary_plot(shap_values, X, show=False)
+        fig = plt.gcf()
+        plt.tight_layout()
+        return fig
+    except Exception as e:
+        st.warning(f"Could not generate SHAP plot: {e}")
+        return None
+# Data Exploration and Insights Generation
+def generate_data_insights(df):
+    """Generate comprehensive insights about the dataset"""
+    insights = {}
+    # Basic statistics
+    insights['shape'] = df.shape
+    insights['missing_values'] = df.isna().sum().sum()
+    insights['duplicate_rows'] = df.duplicated().sum()
+    # Column types
+    insights['numeric_columns'] = list(df.select_dtypes(include=['number']).columns)
+    insights['categorical_columns'] = list(df.select_dtypes(include=['object', 'category', 'bool']).columns)
+    insights['datetime_columns'] = []
+    for col in df.columns:
+        try:
+            if pd.to_datetime(df[col], errors='coerce').notna().any():
+                insights['datetime_columns'].append(col)
+        except:
+            pass
+    # Distribution statistics
+    insights['skewed_columns'] = []
+    for col in insights['numeric_columns']:
+        if abs(df[col].skew()) > 1.0:
+            insights['skewed_columns'].append((col, df[col].skew()))
+    # Correlation analysis
+    if len(insights['numeric_columns']) > 1:
+        corr_matrix = df[insights['numeric_columns']].corr().abs()
+        corr_pairs = []
+        for i in range(len(corr_matrix.columns)):
+            for j in range(i):
+                if corr_matrix.iloc[i, j] > 0.7:  # Strong correlation threshold
+                    corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_matrix.iloc[i, j]))
+        insights['correlated_features'] = sorted(corr_pairs, key=lambda x: x[2], reverse=True)
+    # Categorical feature analysis
+    insights['high_cardinality_features'] = []
+    for col in insights['categorical_columns']:
+        if df[col].nunique() > 10:
+            insights['high_cardinality_features'].append((col, df[col].nunique()))
+    # Missing value patterns
+    insights['missing_patterns'] = []
+    for col in df.columns:
+        missing_pct = df[col].isna().mean() * 100
+        if missing_pct > 0:
+            insights['missing_patterns'].append((col, missing_pct))
+    # Outlier detection
+    insights['outlier_columns'] = []
+    for col in insights['numeric_columns']:
+        Q1 = df[col].quantile(0.25)
+        Q3 = df[col].quantile(0.75)
+        IQR = Q3 - Q1
+        outliers_count = ((df[col] < (Q1 - 1.5 * IQR)) | (df[col] > (Q3 + 1.5 * IQR))).sum()
+        if outliers_count > 0:
+            insights['outlier_columns'].append((col, outliers_count, outliers_count/len(df)*100))
+    return insights
+# Enhanced Model Selection and Training Functions
+def get_model_options(problem_type):
+    """Get appropriate models for the selected problem type"""
+    if problem_type == "Classification":
+        return {
+            "Neural Network": MLPClassifier(max_iter=1000, random_state=42),
+            "Random Forest": RandomForestClassifier(random_state=42),
+            "Gradient Boosting": GradientBoostingClassifier(random_state=42),
+            "XGBoost": xgb.XGBClassifier(random_state=42)
+        }
+    elif problem_type == "Regression":
+        return {
+            "Neural Network": MLPRegressor(max_iter=1000, random_state=42),
+            "Random Forest": RandomForestRegressor(random_state=42),
+            "Gradient Boosting": GradientBoostingRegressor(random_state=42),
+            "XGBoost": xgb.XGBRegressor(random_state=42)
+        }
+    else:  # Clustering
+        return {
+            "K-Means": KMeans(random_state=42),
+            "DBSCAN": DBSCAN(),
+            "Agglomerative": AgglomerativeClustering()
+        }
+def train_model_with_optimization(model, X_train, X_test, y_train, y_test, problem_type, optimization_level="basic"):
+    """Train model with optional hyperparameter optimization"""
+    start_time = time.time()
+    if optimization_level == "none":
+        # Simple fit without optimization
+        model.fit(X_train, y_train)
+        best_model = model
+    elif optimization_level == "basic":
+        # Basic parameter grid
+        param_grid = {}
+        if problem_type in ["Classification", "Regression"]:
+            if isinstance(model, (RandomForestClassifier, RandomForestRegressor)):
+                param_grid = {
+                    'n_estimators': [100, 200],
+                    'max_depth': [None, 10, 20]
+                }
+            elif isinstance(model, (GradientBoostingClassifier, GradientBoostingRegressor)):
+                param_grid = {
+                    'n_estimators': [100, 200],
+                    'learning_rate': [0.01, 0.1]
+                }
+            elif isinstance(model, (MLPClassifier, MLPRegressor)):
+                param_grid = {
+                    'hidden_layer_sizes': [(100,), (100, 50)],
+                    'alpha': [0.0001, 0.001]
+                }
+            elif "XGB" in str(model.__class__):
+                param_grid = {
+                    'n_estimators': [100, 200],
+                    'learning_rate': [0.01, 0.1],
+                    'max_depth': [3, 6]
+                }
+        elif problem_type == "Clustering":
+            if isinstance(model, KMeans):
+                param_grid = {
+                    'n_clusters': [3, 4, 5, 6]
+                }
+            elif isinstance(model, DBSCAN):
+                param_grid = {
+                    'eps': [0.3, 0.5, 0.7],
+                    'min_samples': [5, 10, 15]
+                }
+            elif isinstance(model, AgglomerativeClustering):
+                param_grid = {
+                    'n_clusters': [3, 4, 5, 6],
+                    'linkage': ['ward', 'complete', 'average']
+                }
+        # Only run GridSearchCV if we have parameters to optimize
+        if param_grid:
+            if problem_type == "Clustering":
+                # For clustering, use silhouette score as the metric
+                from sklearn.metrics import make_scorer, silhouette_score
+                from sklearn.model_selection import GridSearchCV
+                # Custom scorer for clustering
+                def silhouette_scorer(estimator, X):
+                    labels = estimator.fit_predict(X)
+                    if len(set(labels)) <= 1:  # Check if all points are in one cluster
+                        return -1
+                    return silhouette_score(X, labels)
+                grid_search = GridSearchCV(
+                    estimator=model,
+                    param_grid=param_grid,
+                    scoring=make_scorer(silhouette_scorer),
+                    cv=3,
+                    n_jobs=-1
+                )
+                grid_search.fit(X_train)
+            else:
+                # For classification and regression
+                scoring = 'accuracy' if problem_type == "Classification" else 'r2'
+                grid_search = GridSearchCV(
+                    estimator=model,
+                    param_grid=param_grid,
+                    scoring=scoring,
+                    cv=5,
+                    n_jobs=-1
+                )
+                grid_search.fit(X_train, y_train)
+            best_model = grid_search.best_estimator_
+        else:
+            # If no param grid, just fit the model
+            model.fit(X_train, y_train)
+            best_model = model
+    else:  # Advanced optimization
+        # TODO: Implement advanced optimization with more parameters,
+        # RandomizedSearchCV or BayesianOptimization
+        pass
+    # Calculate training time
+    training_time = time.time() - start_time
+    # Get predictions for evaluation
+    if problem_type == "Clustering":
+        if hasattr(best_model, 'predict'):
+            y_pred = best_model.predict(X_test)
+        else:
+            y_pred = best_model.fit_predict(X_test)
+    else:
+        y_pred = best_model.predict(X_test)
+    return best_model, y_pred, training_time
 # Pages
 def data_upload_page():
+    """Enhanced data upload & analysis page"""
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    # Create a header with animation
+    col1, col2 = st.columns([1, 3])
+    with col1:
+        st_lottie(lottie_upload, height=150, key="upload_animation")
+    with col2:
+        st.markdown('<div class="step-header">', unsafe_allow_html=True)
+        st.markdown('<div class="step-counter">1</div>', unsafe_allow_html=True)
+        st.markdown('<div class="step-title">Data Upload & Exploratory Analysis</div>', unsafe_allow_html=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+        st.markdown("Upload your dataset and get comprehensive insights before model training.")
+    # File uploader with enhanced UI
+    uploaded_file = st.file_uploader("Upload Dataset (CSV, Excel, or JSON)",
+                                     type=["csv", "xlsx", "json"],
+                                     help="Upload your data file to start analysis")
     if uploaded_file:
+        # Provide feedback during loading
+        with st.spinner('Reading and analyzing your dataset...'):
+            # Determine file type and read
+            if uploaded_file.name.endswith('csv'):
+                df = pd.read_csv(uploaded_file)
+            elif uploaded_file.name.endswith('xlsx'):
+                df = pd.read_excel(uploaded_file)
+            elif uploaded_file.name.endswith('json'):
+                df = pd.read_json(uploaded_file)
+            # Store in session state
+            st.session_state.df = df
+            st.session_state.vector_store = create_vector_store(convert_df_to_text(df))
+            st.session_state.metrics = {}
+            # Generate insights
+            st.session_state.dataset_insights = generate_data_insights(df)
+        # Success notification
+        if 'notification' not in st.session_state or st.session_state.notification is None:
+            st.session_state.notification = "Data successfully loaded! 🎉"
+        # Display a notification
+        st.markdown(f"""
+        <div class="notification">
+            <div class="notification-icon">✓</div>
+            <div>{st.session_state.notification}</div>
+        </div>
+        """, unsafe_allow_html=True)
+        st.session_state.notification = None
+        # Create tabs for different data views
+        data_tabs = st.tabs(["📊 Overview", "🔍 Data Explorer", "📈 Visualizations", "📋 Profile Report"])
+        with data_tabs[0]:
+            st.subheader("Dataset Overview")
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                st.markdown('<div class="metrics-card">', unsafe_allow_html=True)
+                st.markdown(f'<div class="metrics-value">{df.shape[0]:,}</div>', unsafe_allow_html=True)
+                st.markdown('<div class="metrics-label">Total Samples</div>', unsafe_allow_html=True)
+                st.markdown('</div>', unsafe_allow_html=True)
+            with col2:
+                st.markdown('<div class="metrics-card">', unsafe_allow_html=True)
+                st.markdown(f'<div class="metrics-value">{df.shape[1]}</div>', unsafe_allow_html=True)
+                st.markdown('<div class="metrics-label">Features</div>', unsafe_allow_html=True)
+                st.markdown('</div>', unsafe_allow_html=True)
+            with col3:
+                missing_pct = df.isna().sum().sum() / (df.shape[0] * df.shape[1]) * 100
+                st.markdown('<div class="metrics-card">', unsafe_allow_html=True)
+                st.markdown(f'<div class="metrics-value">{missing_pct:.1f}%</div>', unsafe_allow_html=True)
+                st.markdown('<div class="metrics-label">Missing Values</div>', unsafe_allow_html=True)
+                st.markdown('</div>', unsafe_allow_html=True)
+            # Data Types Breakdown
+            st.subheader("Data Types")
+            dtype_counts = df.dtypes.value_counts().reset_index()
+            dtype_counts.columns = ['Data Type', 'Count']
+            fig = px.pie(dtype_counts, values='Count', names='Data Type', hole=0.4,
+                         color_discrete_sequence=px.colors.qualitative.Bold)
+            fig.update_layout(margin=dict(t=0, b=0, l=0, r=0), height=300)
+            st.plotly_chart(fig, use_container_width=True)
+            # Key Insights
+            st.subheader("Key Insights")
+            insights = st.session_state.dataset_insights
+            insight_cols = st.columns(2)
+            with insight_cols[0]:
+                st.markdown("**Data Quality Issues**")
+                if insights['missing_patterns']:
+                    st.markdown("🔹 **Missing Values:**")
+                    for col, pct in sorted(insights['missing_patterns'], key=lambda x: x[1], reverse=True)[:5]:
+                        st.markdown(f"  • *{col}*: {pct:.1f}% missing")
+                if insights['outlier_columns']:
+                    st.markdown("🔹 **Outliers Detected:**")
+                    for col, count, pct in sorted(insights['outlier_columns'], key=lambda x: x[2], reverse=True)[:5]:
+                        st.markdown(f"  • *{col}*: {count} outliers ({pct:.1f}%)")
+            with insight_cols[1]:
+                st.markdown("**Feature Relationships**")
+                if 'correlated_features' in insights and insights['correlated_features']:
+                    st.markdown("🔹 **Highly Correlated Features:**")
+                    for col1, col2, corr in insights['correlated_features'][:5]:
+                        st.markdown(f"  • *{col1}* & *{col2}*: {corr:.2f} correlation")
+                if insights['skewed_columns']:
+                    st.markdown("🔹 **Skewed Distributions:**")
+                    for col, skew in sorted(insights['skewed_columns'], key=lambda x: abs(x[1]), reverse=True)[:5]:
+                        direction = "right" if skew > 0 else "left"
+                        st.markdown(f"  • *{col}*: {direction}-skewed ({skew:.2f})")
+        with data_tabs[1]:
+            st.subheader("Interactive Data Explorer")
+            # Filter and search options
+            col1, col2 = st.columns([2, 3])
+            with col1:
+                search_term = st.text_input("Search columns", "")
+            with col2:
+                selected_dtypes = st.multiselect(
+                    "Filter by data type",
+                    options=['numeric', 'object', 'datetime', 'category', 'bool'],
+                    default=['numeric', 'object']
+                )
+            # Apply filters
+            filtered_cols = df.columns
+            if search_term:
+                filtered_cols = [col for col in filtered_cols if search_term.lower() in col.lower()]
+            if selected_dtypes:
+                dtype_map = {
+                    'numeric': 'number',
+                    'object': 'object',
+                    'datetime': 'datetime',
+                    'category': 'category',
+                    'bool': 'bool'
+                }
+                dtype_filtered = []
+                for dtype in selected_dtypes:
+                    dtype_filtered.extend(df.select_dtypes(include=[dtype_map[dtype]]).columns)
+                filtered_cols = [col for col in filtered_cols if col in dtype_filtered]
+            # Display filtered dataframe
+            if filtered_cols:
+                st.dataframe(df[filtered_cols], height=400)
+                # Column statistics
+                selected_column = st.selectbox("Select column for detailed statistics", options=filtered_cols)
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.subheader(f"Statistics for: {selected_column}")
+                    if pd.api.types.is_numeric_dtype(df[selected_column]):
+                        stats = df[selected_column].describe()
+                        for stat, value in stats.items():
+                            st.markdown(f"**{stat}:** {value:.4f}")
+                        # Additional stats
+                        st.markdown(f"**Skewness:** {df[selected_column].skew():.4f}")
+                        st.markdown(f"**Kurtosis:** {df[selected_column].kurtosis():.4f}")
+                    else:
+                        st.markdown(f"**Unique Values:** {df[selected_column].nunique()}")
+                        st.markdown(f"**Most Common:** {df[selected_column].value_counts().index[0]}")
+                        st.markdown(f"**Least Common:** {df[selected_column].value_counts().index[-1]}")
+                        st.markdown(f"**Missing Values:** {df[selected_column].isna().sum()} ({df[selected_column].isna().mean()*100:.2f}%)")
+                with col2:
+                    st.subheader("Distribution")
+                    if pd.api.types.is_numeric_dtype(df[selected_column]):
+                        # Histogram for numeric
+                        fig = px.histogram(df, x=selected_column, histnorm='probability density',
+                                         marginal='box', color_discrete_sequence=['#3B82F6'])
+                        fig.update_layout(height=300, margin=dict(l=0, r=0, t=20, b=0))
+                    else:
+                        # Bar chart for categorical
+                        value_counts = df[selected_column].value_counts().reset_index()
+                        value_counts.columns = [selected_column, 'Count']
+                        value_counts = value_counts.head(15)  # Limit to top 15
+                        fig = px.bar(value_counts, x=selected_column, y='Count',
+                                   color_discrete_sequence=['#3B82F6'])
+                        fig.update_layout(height=300, margin=dict(l=0, r=0, t=20, b=0))
+                    st.plotly_chart(fig, use_container_width=True)
+            else:
+                st.warning("No columns match your filters.")
+        with data_tabs[2]:
+            st.subheader("Data Visualizations")
+            viz_type = st.selectbox(
+                "Select Visualization Type",
+                options=["Scatter Plot", "Correlation Matrix", "Pair Plot", "Box Plot", "Violin Plot", "Line Chart"]
+            )
+            if viz_type == "Scatter Plot":
+                col1, col2, col3 = st.columns(3)
+                with col1:
+                    x_col = st.selectbox("X-axis", options=df.select_dtypes(include=['number']).columns)
+                with col2:
+                    y_col = st.selectbox("Y-axis", options=df.select_dtypes(include=['number']).columns,
+                                        index=min(1, len(df.select_dtypes(include=['number']).columns)-1))
+                with col3:
+                    color_col = st.selectbox("Color by", options=["None"] + list(df.columns), index=0)
+                # Create plot
+                if color_col == "None":
+                    fig = px.scatter(df, x=x_col, y=y_col, title=f"{x_col} vs {y_col}",
+                                    opacity=0.7, color_discrete_sequence=['#3B82F6'])
+                else:
+                    fig = px.scatter(df, x=x_col, y=y_col, color=color_col, title=f"{x_col} vs {y_col} by {color_col}",
+                                    opacity=0.7)
+                fig.update_layout(height=500)
+                st.plotly_chart(fig, use_container_width=True)
+                # Add regression line option
+                if st.checkbox("Add regression line"):
+                    fig = px.scatter(df, x=x_col, y=y_col, trendline="ols",
+                                    title=f"{x_col} vs {y_col} with Regression Line",
+                                    opacity=0.7, color_discrete_sequence=['#3B82F6'])
+                    fig.update_layout(height=500)
+                    st.plotly_chart(fig, use_container_width=True)
+            elif viz_type == "Correlation Matrix":
+                # Select columns for correlation
+                numeric_cols = df.select_dtypes(include=['number']).columns
+                selected_corr_cols = st.multiselect(
+                    "Select columns for correlation matrix",
+                    options=numeric_cols,
+                    default=list(numeric_cols)[:min(8, len(numeric_cols))]
+                )
+                if selected_corr_cols:
+                    # Correlation matrix
+                    corr = df[selected_corr_cols].corr()
+                    mask = np.triu(np.ones_like(corr, dtype=bool))
+                    # Create plot
+                    fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r',
+                                  zmin=-1, zmax=1, aspect="auto")
+                    fig.update_layout(height=600)
+                    st.plotly_chart(fig, use_container_width=True)
+                else:
+                    st.warning("Please select at least one numeric column.")
+            elif viz_type == "Pair Plot":
+                # Select columns for pair plot
+                numeric_cols = df.select_dtypes(include=['number']).columns
+                selected_pair_cols = st.multiselect(
+                    "Select columns for pair plot (limit 4-5 for readability)",
+                    options=numeric_cols,
+                    default=list(numeric_cols)[:min(4, len(numeric_cols))]
+                )
+                if selected_pair_cols:
+                    if len(selected_pair_cols) > 6:
+                        st.warning("Too many columns may make the plot hard to read. Consider selecting 4-5 columns.")
+                    # Color option
+                    color_col = st.selectbox("Color by (categorical)",
+                                           options=["None"] + list(df.select_dtypes(exclude=['number']).columns),
+                                           index=0)
+                    # Create pair plot
+                    if color_col == "None":
+                        fig = px.scatter_matrix(df, dimensions=selected_pair_cols, opacity=0.7)
+                    else:
+                        fig = px.scatter_matrix(df, dimensions=selected_pair_cols, color=color_col, opacity=0.7)
+                    fig.update_layout(height=700)
+                    st.plotly_chart(fig, use_container_width=True)
+                else:
+                    st.warning("Please select at least one numeric column.")
+            elif viz_type == "Box Plot":
+                col1, col2 = st.columns(2)
+                with col1:
+                    y_col = st.selectbox("Value column", options=df.select_dtypes(include=['number']).columns)
+                with col2:
+                    x_col = st.selectbox("Category column",
+                                        options=["None"] + list(df.select_dtypes(exclude=['number']).columns),
+                                        index=0)
+                # Create plot
+                if x_col == "None":
+                    fig = px.box(df, y=y_col, title=f"Distribution of {y_col}",
+                               color_discrete_sequence=['#3B82F6'])
+                else:
+                    fig = px.box(df, x=x_col, y=y_col, title=f"Distribution of {y_col} by {x_col}")
+                fig.update_layout(height=500)
+                st.plotly_chart(fig, use_container_width=True)
+            elif viz_type == "Violin Plot":
+                col1, col2 = st.columns(2)
+                with col1:
+                    y_col = st.selectbox("Value column", options=df.select_dtypes(include=['number']).columns, key="violin_y")
+                with col2:
+                    x_col = st.selectbox("Category column",
+                                        options=["None"] + list(df.select_dtypes(exclude=['number']).columns),
+                                        index=0, key="violin_x")
+                # Create plot
+                if x_col == "None":
+                    fig = px.violin(df, y=y_col, box=True, title=f"Distribution of {y_col}",
+                                  color_discrete_sequence=['#3B82F6'])
+                else:
+                    fig = px.violin(df, x=x_col, y=y_col, box=True, title=f"Distribution of {y_col} by {x_col}")
+                fig.update_layout(height=500)
+                st.plotly_chart(fig, use_container_width=True)
+            elif viz_type == "Line Chart":
+                # Identify potential date columns
+                date_cols = []
+                for col in df.columns:
+                    try:
+                        if pd.to_datetime(df[col], errors='coerce').notna().all():
+                            date_cols.append(col)
+                    except:
+                        pass
+                if date_cols:
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        x_col = st.selectbox("Time axis", options=date_cols)
+                        # Convert to datetime if not already
+                        df[x_col] = pd.to_datetime(df[x_col])
+                    with col2:
+                        y_cols = st.multiselect("Value columns", options=df.select_dtypes(include=['number']).columns,
+                                               default=[df.select_dtypes(include=['number']).columns[0]])
+                    if y_cols:
+                        # Create line chart
+                        fig = go.Figure()
+                        for y_col in y_cols:
+                            fig.add_trace(go.Scatter(x=df[x_col], y=df[y_col], mode='lines', name=y_col))
+                        fig.update_layout(
+                            title=f"Time Series Plot",
+                            xaxis_title=x_col,
+                            yaxis_title="Values",
+                            height=500,
+                            legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
+                        )
+                        st.plotly_chart(fig, use_container_width=True)
+                    else:
+                        st.warning("Please select at least one value column.")
+                else:
+                    st.warning("No datetime columns detected. Please ensure you have columns with date/time values.")
+        with data_tabs[3]:
+            st.subheader("Comprehensive Profiling Report")
+            profile_options = st.columns(3)
+            with profile_options[0]:
+                minimal = st.checkbox("Minimal Report (Faster)", value=True)
+            with profile_options[1]:
+                sample_data = st.checkbox("Use Sample (Faster for large datasets)", value=True)
+            with profile_options[2]:
+                report_percent = st.slider("Sample Size %", min_value=10, max_value=100, value=50, step=10)
+            if st.button("Generate Profile Report"):
+                with st.spinner("Generating comprehensive profile report..."):
+                    if sample_data and len(df) > 1000:
+                        profile_df = df.sample(int(len(df) * report_percent/100))
+                    else:
+                        profile_df = df
+                    if minimal:
+                        profile = ProfileReport(profile_df, minimal=True, title="Dataset Profile Report")
+                    else:
+                        profile = ProfileReport(profile_df, explorative=True, title="Dataset Profile Report")
+                    st_profile_report(profile)
+    st.markdown('</div>', unsafe_allow_html=True)
 def model_training_page():
+    """Enhanced model training page with more options and better UI"""
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    # Header with animation
+    col1, col2 = st.columns([1, 3])
+    with col1:
+        st_lottie(lottie_neural, height=150, key="neural_animation")
+    with col2:
+        st.markdown('<div class="step-header">', unsafe_allow_html=True)
+        st.markdown('<div class="step-counter">2</div>', unsafe_allow_html=True)
+        st.markdown('<div class="step-title">Neural Network Training Studio</div>', unsafe_allow_html=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+        st.markdown("Train advanced models with automated optimization and hyperparameter tuning.")
     if 'df' not in st.session_state:
+        st.warning("Please upload data first!")
+        st.markdown('</div>', unsafe_allow_html=True)
         return
     df = st.session_state.df
+    # Create multiple tabs for the workflow
+    train_tabs = st.tabs(["⚙️ Setup", "🔄 Preprocessing", "🧠 Training", "📊 Results"])
+    with train_tabs[0]:
+        st.subheader("Model Configuration")
+        # Problem type selection
+        problem_type = st.selectbox(
+            "Select Problem Type",
+            ["Classification", "Regression", "Clustering"],
+            help="Classification: predict categories, Regression: predict continuous values, Clustering: group similar data points"
+        )
+        # Domain specialization
+        domain_col1, domain_col2 = st.columns(2)
+        with domain_col1:
+            mode = st.selectbox(
+                "Domain Specialization",
+                ["General", "Legal", "Financial", "Medical", "Technical", "Academic"],
+                help="Optimize the model for your specific domain"
+            )
+        with domain_col2:
+            experiment_name = st.text_input(
+                "Experiment Name",
+                value=f"{problem_type}_{datetime.datetime.now().strftime('%Y%m%d_%H%M')}",
+                help="Name your experiment for reference"
+            )
+        # Target variable selection
+        if problem_type != "Clustering":
+            target_col1, target_col2 = st.columns(2)
+            with target_col1:
+                target = st.selectbox("Select Target Variable", df.columns)
+            with target_col2:
+                if problem_type == "Classification":
+                    st.info(f"Class Distribution: {df[target].value_counts().to_dict()}")
+                else:
+                    st.info(f"Target Range: {df[target].min()} to {df[target].max()}")
+            # Feature selection
+            st.subheader("Feature Selection")
+            select_features = st.checkbox("Select specific features", value=False)
+            if select_features:
+                available_features = [col for col in df.columns if col != target]
+                selected_features = st.multiselect(
+                    "Select features to include",
+                    options=available_features,
+                    default=available_features
+                )
+                st.session_state.selected_columns = selected_features + [target]
+            else:
+                st.session_state.selected_columns = df.columns.tolist()
+        else:
+            # For clustering, all columns are features
 def visualization_page():
+    """Visualization and evaluation page for trained models"""
+    st.markdown('<div class="card">', unsafe_allow_html=True)
+    # Header with animation
+    col1, col2 = st.columns([1, 3])
+    with col1:
+        st_lottie(lottie_visualization, height=150, key="viz_animation")
+    with col2:
+        st.markdown('<div class="step-header">', unsafe_allow_html=True)
+        st.markdown('<div class="step-counter">3</div>', unsafe_allow_html=True)
+        st.markdown('<div class="step-title">Neural Network Evaluation Center</div>', unsafe_allow_html=True)
+        st.markdown('</div>', unsafe_allow_html=True)
+        st.markdown("Visualize, interpret, and validate your trained neural networks.")
     if 'best_model' not in st.session_state:
+        st.warning("Please train a model first!")
+        st.markdown('</div>', unsafe_allow_html=True)
         return
+    # Evaluation tabs for different analyses
+    eval_tabs = st.tabs(["📊 Model Performance", "🔍 Model Interpretation", "🧪 Test Predictions"])
+    # Tabs content would go here
+    st.markdown('</div>', unsafe_allow_html=True)
 def ai_assistant():
+    """AI Assistant for neural network development guidance"""
     st.markdown('<div class="chat-container">', unsafe_allow_html=True)
+    st.subheader("📚 Neural Network Development Assistant")
+    user_input = st.text_area("Ask a question about your data or neural network development:", "")
+    use_web_search = st.checkbox("Enable web search for up-to-date information", value=False)
+    if st.button("Get AI Guidance"):
+        if user_input:
+            with st.spinner("Analyzing your question..."):
+                # Add user message to chat history
+                st.session_state.chat_history.append({"role": "user", "content": user_input})
+                for msg in st.session_state.chat_history:
+                    if msg["role"] == "user":
+                        st.markdown(f'<div class="user-message">{msg["content"]}</div>', unsafe_allow_html=True)
+                    else:
+                        st.markdown(f'<div class="bot-message">{msg["content"]}</div>', unsafe_allow_html=True)
+                # Generate response
+                try:
+                    ai_response = get_groq_response(user_input, st.session_state.get('mode', 'General'), use_web_search)
+                    st.session_state.chat_history.append({"role": "assistant", "content": ai_response})
+                    st.markdown(f'<div class="bot-message">{ai_response}</div>', unsafe_allow_html=True)
+                except Exception as e:
+                    st.error(f"Error getting AI response: {str(e)}")
+                    st.info("Falling back to alternative model...")
+                    try:
+                        # Fallback to OpenAI
+                        ai_response = "I'm sorry, I couldn't generate a proper response. Please try rephrasing your question."
+                        st.session_state.chat_history.append({"role": "assistant", "content": ai_response})
+                        st.markdown(f'<div class="bot-message">{ai_response}</div>', unsafe_allow_html=True)
+                    except:
+                        st.error("Both primary and fallback AI services failed. Please try again later.")
     st.markdown('</div>', unsafe_allow_html=True)
+# Initialize additional session state variables
+if 'notification' not in st.session_state:
+    st.session_state.notification = None
+# Import and initialize Lottie animations
+def load_lottie_url(url):
+    """Load Lottie animation from URL"""
+    try:
+        import json
+        import requests
+        r = requests.get(url)
+        if r.status_code != 200:
+            return None
+        return r.json()
+    except:
+        return None
+# Lottie animations
+lottie_upload = load_lottie_url("https://assets9.lottiefiles.com/packages/lf20_grdj1jti.json")
+lottie_neural = load_lottie_url("https://assets8.lottiefiles.com/private_files/lf30_8uvz2gcg.json")
+lottie_visualization = load_lottie_url("https://assets5.lottiefiles.com/packages/lf20_usmfx6bp.json")
+# Main function to run the app
+def main():
+    """Main function to run the app"""
+    # Main App Layout
+    st.markdown("""
+        <div class="header">
+            <h1 class="header-title">Neural-Vision Enhanced</h1>
+            <div class="header-subtitle">Neural Network Development for Domain-Specialized Analysis</div>
+        </div>
+    """, unsafe_allow_html=True)
+    with st.sidebar:
+        st.title("🔮 Neural-Vision Enhanced")
+        page = st.selectbox("Navigation", [
+            "Data Upload & Analysis",
+            "Neural Network Training Studio",
+            "Neural Network Evaluation Center"
+        ])
+        st.session_state.active_page = page
+        st.markdown("---")
+        st.markdown("**Environment Setup**")
+        # Tavily API Key Input and Submit Button
+        tavily_api_input = st.text_input("Tavily API Key", type="password", help="Enter your Tavily API key for web search functionality")
+        if st.button("Submit API Key"):
+            if tavily_api_input:
+                st.session_state.tavily_api_key = tavily_api_input
+                st.success("Tavily API Key submitted successfully!")
+            else:
+                st.warning("Please enter a valid API key.")
+        st.markdown("---")
+        st.markdown("v5.0 | © 2025 Neural-Vision")
+    # Page Routing
+    if st.session_state.active_page == "Data Upload & Analysis":
+        data_upload_page()
+    elif st.session_state.active_page == "Neural Network Training Studio":
+        model_training_page()
+    else:
+        visualization_page()
+    ai_assistant()
+# Run the app
+if __name__ == "__main__":
+    main()