Spaces:

CosmickVisions
/

Data-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Mar 16

Commit

e5613af

verified ·

1 Parent(s): 8db18fb

Update app.py

Browse files

Files changed (1) hide show

app.py +201 -523

app.py CHANGED Viewed

@@ -1,35 +1,30 @@
 import streamlit as st
 import pandas as pd
-import numpy as np
 import plotly.express as px
-import plotly.graph_objects as go
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
-import os
-from dotenv import load_dotenv
 from groq import Groq
 from langchain_community.vectorstores import FAISS
-from langchain_community.document_loaders import TextLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-import re
-from scipy import stats
-from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
 import tempfile
-# Set page config as the first Streamlit command
-st.set_page_config(page_title="Data-Vision Pro", layout="wide")
-# Load environment variables
-load_dotenv()
-# Initialize Groq client
 client = Groq(api_key=os.getenv("GROQ_API_KEY"))
-# Initialize HuggingFace embeddings for FAISS
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# Custom CSS with Silver, Blue, and Gold Theme + Responsiveness
 st.markdown("""
     <style>
     :root {
@@ -41,7 +36,7 @@ st.markdown("""
     .stApp {
         background-color: var(--silver);
         font-family: 'Inter', sans-serif;
-        max-width: 900px;
         margin: 0 auto;
         padding: 10px;
     }
@@ -50,69 +45,71 @@ st.markdown("""
         color: white;
         padding: 15px;
         border-radius: 5px;
-        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
         text-align: center;
     }
     .header-title {
-        font-size: 1.5rem;
         font-weight: 700;
         margin: 0;
     }
     .header-subtitle {
-        font-size: 0.9rem;
         margin-top: 5px;
     }
-    .sidebar .sidebar-content {
         background-color: white;
         border-radius: 5px;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
         padding: 15px;
     }
-    .chat-container {
         background-color: white;
         border-radius: 5px;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
         padding: 15px;
         margin-top: 20px;
     }
     .user-message {
         background-color: var(--blue);
         color: white;
-        border-radius: 18px 18px 4px 18px;
-        padding: 12px 16px;
-        margin-left: auto;
         max-width: 80%;
         margin-bottom: 10px;
     }
     .bot-message {
         background-color: #F0F0F0;
         color: var(--text-color);
-        border-radius: 18px 18px 18px 4px;
-        padding: 12px 16px;
-        margin-right: auto;
         max-width: 80%;
         margin-bottom: 10px;
     }
-    .footer {
-        text-align: center;
-        margin-top: 20px;
-        color: var(--text-color);
-        font-size: 0.8rem;
-    }
-    .tech-badge {
-        display: inline-block;
-        background-color: #E6ECEF;
-        color: var(--blue);
-        padding: 4px 8px;
-        border-radius: 12px;
-        font-size: 0.7rem;
-        margin: 0 4px;
-    }
-    h2 {
-        color: var(--blue);
-        border-bottom: 2px solid var(--gold);
-        padding-bottom: 5px;
-    }
     .stButton > button {
         background-color: var(--gold);
         color: white;
@@ -126,48 +123,76 @@ st.markdown("""
     }
     @media (max-width: 768px) {
         .header-title {
-            font-size: 1.2rem;
         }
         .header-subtitle {
-            font-size: 0.8rem;
         }
-        .chat-container, .sidebar .sidebar-content {
             padding: 10px;
         }
         .stApp {
             padding: 5px;
         }
-        h2 {
-            font-size: 1.2rem;
-        }
     }
-    </style>
 """, unsafe_allow_html=True)
-# Helper Functions (unchanged)
-def enhance_section_title(title):
-    st.markdown(f"<h2 style='border-bottom: 2px solid var(--gold); padding-bottom: 5px; color: var(--blue);'>{title}</h2>", unsafe_allow_html=True)
-def update_cleaned_data(df):
-    st.session_state.cleaned_data = df
-    if 'data_versions' not in st.session_state:
-        st.session_state.data_versions = [st.session_state.raw_data.copy()]
-    st.session_state.data_versions.append(df.copy())
-    st.session_state.dataset_text = convert_df_to_text(df)
-    st.success("✅ Action completed successfully!")
-    st.rerun()
 def convert_df_to_text(df):
     text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
     text += f"Missing Values: {df.isna().sum().sum()}\n"
-    text += "Columns:\n"
     for col in df.columns:
-        text += f"- {col} ({df[col].dtype}): "
-        if pd.api.types.is_numeric_dtype(df[col]):
-            text += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
-        else:
-            text += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
-        text += f", Missing={df[col].isna().sum()}\n"
     return text
 def create_vector_store(df_text):
@@ -176,469 +201,122 @@ def create_vector_store(df_text):
         temp_path = temp_file.name
     loader = TextLoader(temp_path)
     documents = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-    texts = text_splitter.split_documents(documents)
     vector_store = FAISS.from_documents(texts, embeddings)
     os.unlink(temp_path)
     return vector_store
-def update_vector_store_with_plot(plot_text, existing_vector_store):
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
-        temp_file.write(plot_text)
-        temp_path = temp_file.name
-    loader = TextLoader(temp_path)
-    documents = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-    texts = text_splitter.split_documents(documents)
-    if existing_vector_store:
-        existing_vector_store.add_documents(texts)
-    else:
-        existing_vector_store = FAISS.from_documents(texts, embeddings)
-    os.unlink(temp_path)
-    return existing_vector_store
-def extract_plot_data(plot_info, df):
-    plot_type = plot_info["type"]
-    x_col = plot_info["x"]
-    y_col = plot_info["y"] if "y" in plot_info else None
-    data = pd.read_json(plot_info["data"])
-    plot_text = f"Plot Type: {plot_type}\n"
-    plot_text += f"X-Axis: {x_col}\n"
-    if y_col:
-        plot_text += f"Y-Axis: {y_col}\n"
-    if plot_type == "Scatter Plot" and y_col:
-        correlation = data[x_col].corr(data[y_col])
-        slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
-        plot_text += f"Correlation: {correlation:.2f}\n"
-        plot_text += f"Linear Regression: Slope={slope:.2f}, Intercept={intercept:.2f}, R²={r_value**2:.2f}, p-value={p_value:.4f}\n"
-        plot_text += f"X Stats: Mean={data[x_col].mean():.2f}, Std={data[x_col].std():.2f}, Min={data[x_col].min():.2f}, Max={data[x_col].max():.2f}\n"
-        plot_text += f"Y Stats: Mean={data[y_col].mean():.2f}, Std={data[y_col].std():.2f}, Min={data[y_col].min():.2f}, Max={data[y_col].max():.2f}\n"
-    elif plot_type == "Histogram":
-        plot_text += f"Stats: Mean={data[x_col].mean():.2f}, Median={data[x_col].median():.2f}, Std={data[x_col].std():.2f}\n"
-        plot_text += f"Skewness: {data[x_col].skew():.2f}\n"
-        plot_text += f"Range: [{data[x_col].min():.2f}, {data[x_col].max():.2f}]\n"
-    elif plot_type == "Box Plot" and y_col:
-        q1, q3 = data[y_col].quantile(0.25), data[y_col].quantile(0.75)
-        iqr = q3 - q1
-        plot_text += f"Y Stats: Median={data[y_col].median():.2f}, Q1={q1:.2f}, Q3={q3:.2f}, IQR={iqr:.2f}\n"
-        plot_text += f"Outliers: {len(data[y_col][(data[y_col] < q1 - 1.5 * iqr) | (data[y_col] > q3 + 1.5 * iqr)])} potential outliers\n"
-    elif plot_type == "Line Chart" and y_col:
-        plot_text += f"Y Stats: Mean={data[y_col].mean():.2f}, Std={data[y_col].std():.2f}, Trend={'increasing' if data[y_col].iloc[-1] > data[y_col].iloc[0] else 'decreasing'}\n"
-    elif plot_type == "Bar Chart":
-        plot_text += f"Counts: {data[x_col].value_counts().to_dict()}\n"
-    elif plot_type == "Correlation Matrix":
-        corr = data.corr()
-        plot_text += "Correlation Matrix:\n"
-        for col1 in corr.columns:
-            for col2 in corr.index:
-                if col1 < col2:
-                    plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
-    return plot_text
-def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
-    system_prompt = (
-        "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
-        f"The user is on the '{app_mode}' page:\n"
-        "- **Data Upload**: Upload CSV/XLSX files, view stats, or generate reports.\n"
-        "- **Data Cleaning**: Clean data (e.g., handle missing values, encode variables).\n"
-        "- **EDA**: Visualize data (e.g., scatter plots, histograms) and analyze plots.\n"
-        "When analyzing plots, provide detailed insights based on numerical data extracted from them."
-    )
     context = ""
-    if vector_store:
-        docs = vector_store.similarity_search(user_input, k=3)
-        if docs:
-            context = "\n\nDataset and Plot Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
-            system_prompt += f"Use this dataset and plot context to augment your response:\n{context}"
-    else:
-        system_prompt += "No dataset or plot data is loaded. Assist based on app functionality."
     try:
         response = client.chat.completions.create(
-            model=model,
             messages=[
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_input}
-            ],
-            temperature=0.7,
-            max_tokens=1024
-        )
-        return response.choices[0].message.content
     except Exception as e:
         return f"Error: {str(e)}"
-# Command Functions
-def drop_columns(columns):
-    if 'cleaned_data' in st.session_state:
-        df = st.session_state.cleaned_data.copy()
-        columns_to_drop = [col.strip() for col in columns.split(',')]
-        valid_columns = [col for col in columns_to_drop if col in df.columns]
-        if valid_columns:
-            df.drop(valid_columns, axis=1, inplace=True)
-            update_cleaned_data(df)
-            return f"Dropped columns: {', '.join(valid_columns)}"
-        else:
-            return "No valid columns found to drop."
-    return "No dataset loaded."
-def generate_scatter_plot(params):
-    df = st.session_state.cleaned_data
-    match = re.search(r"([\w\s]+)\s+vs\s+([\w\s]+)", params)
-    if match and len(match.groups()) >= 2:
-        x_axis, y_axis = match.group(1).strip(), match.group(2).strip()
-        if x_axis in df.columns and y_axis in df.columns:
-            fig = px.scatter(df, x=x_axis, y=y_axis, title=f'Scatter Plot of {x_axis} vs {y_axis}')
-            st.plotly_chart(fig)
-            st.session_state.last_plot = {"type": "Scatter Plot", "x": x_axis, "y": y_axis, "data": df[[x_axis, y_axis]].to_json()}
-            return f"Generated scatter plot of {x_axis} vs {y_axis}"
-    return "Invalid columns for scatter plot."
-def generate_histogram(params):
-    df = st.session_state.cleaned_data
-    x_axis = params.strip()
-    if x_axis in df.columns:
-        fig = px.histogram(df, x=x_axis, title=f'Histogram of {x_axis}')
-        st.plotly_chart(fig)
-        st.session_state.last_plot = {"type": "Histogram", "x": x_axis, "data": df[[x_axis]].to_json()}
-        return f"Generated histogram of {x_axis}"
-    return "Invalid column for histogram."
-def analyze_plot():
-    if "last_plot" not in st.session_state:
-        return "No plot available to analyze."
-    plot_info = st.session_state.last_plot
-    df = pd.read_json(plot_info["data"])
-    plot_text = extract_plot_data(plot_info, df)
-    return f"Analysis of the last plot:\n{plot_text}"
-def parse_command(command):
-    command = command.lower().strip()
-    if "drop columns" in command or "drop column" in command:
-        columns = command.replace("drop columns", "").replace("drop column", "").strip()
-        return drop_columns, columns
-    elif "show a scatter plot" in command or "scatter plot of" in command:
-        params = command.replace("show a scatter plot of", "").replace("scatter plot of", "").strip()
-        return generate_scatter_plot, params
-    elif "show a histogram" in command or "histogram of" in command:
-        params = command.replace("show a histogram of", "").replace("histogram of", "").strip()
-        return generate_histogram, params
-    elif "analyze plot" in command:
-        return lambda x: analyze_plot(), None
-    return None, command
-# Dataset Preview Function
-def display_dataset_preview():
-    if 'cleaned_data' in st.session_state:
-        st.subheader("Current Dataset Preview")
-        st.dataframe(st.session_state.cleaned_data.head(10), use_container_width=True)
-        st.markdown("---")
-# Main App
 def main():
-    # Header
-    st.markdown("""
-        <div class="header">
-            <h1 class="header-title">Data-Vision Pro</h1>
-            <div class="header-subtitle">Advanced Data Analysis with Groq Inference</div>
-        </div>
-    """, unsafe_allow_html=True)
-    # Sidebar Navigation
-    with st.sidebar:
-        st.markdown("### 🔮 Data-Vision Pro")
-        st.markdown("Your AI-powered data analysis suite with RAG.")
-        st.markdown("---")
-        app_mode = st.selectbox(
-            "Navigation",
-            ["Data Upload", "Data Cleaning", "EDA"],
-            format_func=lambda x: f"📌 {x}"
-        )
-        model = st.selectbox(
-            "Select Groq Model",
-            ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"],
-            index=0
-        )
-        if app_mode == "Data Upload":
-            st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
-        elif app_mode == "Data Cleaning":
-            st.info("🧹 Clean and preprocess your data.")
-        elif app_mode == "EDA":
-            st.info("🔍 Explore your data visually.")
-        if 'cleaned_data' in st.session_state:
-            csv = st.session_state.cleaned_data.to_csv(index=False)
-            st.download_button(
-                label="Download Cleaned Data",
-                data=csv,
-                file_name='cleaned_data.csv',
-                mime='text/csv',
-            )
-        st.markdown("---")
-        st.markdown("Built with <span class='tech-badge'>Streamlit</span> + <span class='tech-badge'>Groq</span>", unsafe_allow_html=True)
-    # Initialize Session State
-    if 'vector_store' not in st.session_state:
-        st.session_state.vector_store = None
-    if 'chat_history' not in st.session_state:
-        st.session_state.chat_history = []
-    # Display Dataset Preview
-    display_dataset_preview()
-    # App Pages
-    if app_mode == "Data Upload":
-        st.header("📤 Data Upload & Profiling")
-        uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"], key="file_uploader")
         if uploaded_file:
-            st.session_state.pop('raw_data', None)
-            st.session_state.pop('cleaned_data', None)
-            st.session_state.pop('data_versions', None)
-            try:
-                if uploaded_file.name.endswith('.csv'):
-                    df = pd.read_csv(uploaded_file)
-                else:
-                    df = pd.read_excel(uploaded_file)
-                if df.empty:
-                    st.error("Uploaded file is empty.")
-                    st.stop()
-                st.session_state.raw_data = df
-                st.session_state.cleaned_data = df.copy()
-                st.session_state.dataset_text = convert_df_to_text(df)
-                st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
-                if 'data_versions' not in st.session_state:
-                    st.session_state.data_versions = [df.copy()]
-                col1, col2, col3 = st.columns(3)
-                with col1: st.metric("Rows", df.shape[0])
-                with col2: st.metric("Columns", df.shape[1])
-                with col3: st.metric("Missing Values", df.isna().sum().sum())
-                if st.checkbox("Show Data Preview"):
-                    st.dataframe(df.head(10), use_container_width=True)
-                if st.button("Generate Full Profile Report"):
-                    with st.spinner("Generating report..."):
-                        pr = ProfileReport(df, explorative=True)
-                        st_profile_report(pr)
-                st.success("✅ Data loaded successfully!")
-            except Exception as e:
-                st.error(f"An error occurred: {str(e)}")
-    elif app_mode == "Data Cleaning":
-        st.header("🧹 Smart Data Cleaning")
-        if 'raw_data' not in st.session_state:
-            st.warning("Please upload data first in the Data Upload section.")
-            st.stop()
-        if 'cleaned_data' in st.session_state:
-            df = st.session_state.cleaned_data.copy()
         else:
-            st.session_state.cleaned_data = st.session_state.raw_data.copy()
-            df = st.session_state.cleaned_data.copy()
-        enhance_section_title("📊 Data Health Dashboard")
-        with st.expander("Explore Data Health Metrics", expanded=True):
-            col1, col2, col3 = st.columns(3)
-            with col1: st.metric("Columns", len(df.columns))
-            with col2: st.metric("Rows", len(df))
-            with col3: st.metric("Missing Values", df.isna().sum().sum())
-            if st.button("Generate Detailed Health Report"):
-                with st.spinner("Generating report..."):
-                    profile = ProfileReport(df, minimal=True)
-                    st_profile_report(profile)
-            if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
-                if st.button("Undo Last Action"):
-                    st.session_state.data_versions.pop()
-                    st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
-                    st.session_state.dataset_text = convert_df_to_text(st.session_state.cleaned_data)
-                    st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
-                    st.rerun()
-        with st.expander("🛠️ Data Cleaning Operations", expanded=True):
-            enhance_section_title("🔍 Missing Values Treatment")
-            missing_cols = df.columns[df.isna().any()].tolist()
-            if missing_cols:
-                cols = st.multiselect("Select columns with missing values", missing_cols)
-                method = st.selectbox("Choose imputation method", [
-                    "Drop Missing Values", "Fill with Mean/Median", "Fill with Custom Value", "Forward Fill", "Backward Fill"
-                ])
-                if method == "Fill with Custom Value":
-                    custom_val = st.text_input("Enter custom value:")
-                if st.button("Apply Missing Value Treatment"):
-                    new_df = df.copy()
-                    if method == "Drop Missing Values":
-                        new_df = new_df.dropna(subset=cols)
-                    elif method == "Fill with Mean/Median":
-                        for col in cols:
-                            if pd.api.types.is_numeric_dtype(new_df[col]):
-                                new_df[col] = new_df[col].fillna(new_df[col].median())
-                            else:
-                                new_df[col] = new_df[col].fillna(new_df[col].mode()[0])
-                    elif method == "Fill with Custom Value" and custom_val:
-                        new_df[cols] = new_df[cols].fillna(custom_val)
-                    elif method == "Forward Fill":
-                        new_df[cols] = new_df[cols].ffill()
-                    elif method == "Backward Fill":
-                        new_df[cols] = new_df[cols].bfill()
-                    update_cleaned_data(new_df)
             else:
-                st.success("✨ No missing values detected!")
-            enhance_section_title("🔄 Data Type Conversion")
-            col_to_convert = st.selectbox("Select column to convert", df.columns)
-            new_type = st.selectbox("Select new data type", ["String", "Integer", "Float", "Boolean", "Datetime"])
-            if new_type == "Datetime":
-                date_format = st.text_input("Enter date format (e.g., %Y-%m-%d):", "%Y-%m-%d")
-            if st.button("Convert Data Type"):
-                new_df = df.copy()
-                if new_type == "String":
-                    new_df[col_to_convert] = new_df[col_to_convert].astype(str)
-                elif new_type == "Integer":
-                    new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce').astype('Int64')
-                elif new_type == "Float":
-                    new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce')
-                elif new_type == "Boolean":
-                    new_df[col_to_convert] = new_df[col_to_convert].astype(bool)
-                elif new_type == "Datetime":
-                    new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
-                update_cleaned_data(new_df)
-            enhance_section_title("🗑️ Drop Columns")
-            columns_to_drop = st.multiselect("Select columns to remove", df.columns)
-            if columns_to_drop and st.button("Confirm Column Removal"):
-                new_df = df.copy()
-                new_df = new_df.drop(columns=columns_to_drop)
-                update_cleaned_data(new_df)
-            enhance_section_title("🔢 Encoding Options")
-            encoding_method = st.radio("Choose encoding method", ("Label Encoding", "One-Hot Encoding"))
-            data_to_encode = st.multiselect("Select columns to encode", df.select_dtypes(include='object').columns)
-            if data_to_encode and st.button("Apply Encoding"):
-                new_df = df.copy()
-                if encoding_method == "Label Encoding":
-                    for col in data_to_encode:
-                        le = LabelEncoder()
-                        new_df[col] = le.fit_transform(new_df[col].astype(str))
-                elif encoding_method == "One-Hot Encoding":
-                    new_df = pd.get_dummies(new_df, columns=data_to_encode, drop_first=True, dtype=int)
-                update_cleaned_data(new_df)
-            enhance_section_title("📏 StandardScaler")
-            scale_cols = st.multiselect("Select numerical columns to scale", df.select_dtypes(include=np.number).columns)
-            if scale_cols and st.button("Apply StandardScaler"):
-                new_df = df.copy()
                 scaler = StandardScaler()
-                new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
-                update_cleaned_data(new_df)
-    elif app_mode == "EDA":
-        st.header("🔍 Interactive Data Explorer")
-        if 'cleaned_data' not in st.session_state:
-            st.warning("Please upload and clean data first.")
-            st.stop()
-        df = st.session_state.cleaned_data.copy()
-        enhance_section_title("Dataset Overview")
-        with st.container():
-            col1, col2, col3, col4 = st.columns(4)
-            col1.metric("Total Rows", df.shape[0])
-            col2.metric("Total Columns", df.shape[1])
-            missing_percentage = df.isna().sum().sum() / df.size * 100
-            col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
-            col4.metric("Duplicates", df.duplicated().sum())
-        tab1, tab2, tab3 = st.tabs(["Quick Preview", "Column Types", "Missing Matrix"])
-        with tab1:
-            st.write("First few rows of the dataset:")
-            st.dataframe(df.head(), use_container_width=True)
-        with tab2:
-            st.write("Column Data Types:")
-            type_counts = df.dtypes.value_counts().reset_index()
-            type_counts.columns = ['Type', 'Count']
-            st.dataframe(type_counts, use_container_width=True)
-        with tab3:
-            st.write("Missing Values Matrix:")
-            fig_missing = px.imshow(df.isna(), color_continuous_scale=['#e0e0e0', '#66c2a5'])
-            fig_missing.update_layout(coloraxis_colorscale=[[0, 'lightgrey'], [1, '#FF4B4B']])
-            st.plotly_chart(fig_missing, use_container_width=True)
-        enhance_section_title("Interactive Visualization Builder")
-        with st.container():
-            col1, col2 = st.columns([1, 3])
-            with col1:
-                plot_type = st.selectbox("Choose visualization type", [
-                    "Scatter Plot", "Histogram", "Box Plot", "Line Chart", "Bar Chart", "Correlation Matrix"
-                ])
-                x_axis = st.selectbox("X-axis", df.columns) if plot_type != "Correlation Matrix" else None
-                y_axis = st.selectbox("Y-axis", df.columns) if plot_type in ["Scatter Plot", "Box Plot", "Line Chart"] else None
-                color_by = st.selectbox("Color encoding", ["None"] + df.columns.tolist(), format_func=lambda x: "No color" if x == "None" else x) if plot_type != "Correlation Matrix" else None
-            with col2:
-                try:
-                    fig = None
-                    if plot_type == "Scatter Plot" and x_axis and y_axis:
-                        fig = px.scatter(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Scatter Plot of {x_axis} vs {y_axis}')
-                    elif plot_type == "Histogram" and x_axis:
-                        fig = px.histogram(df, x=x_axis, color=color_by if color_by != "None" else None, nbins=30, title=f'Histogram of {x_axis}')
-                    elif plot_type == "Box Plot" and x_axis and y_axis:
-                        fig = px.box(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Box Plot of {x_axis} vs {y_axis}')
-                    elif plot_type == "Line Chart" and x_axis and y_axis:
-                        fig = px.line(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Line Chart of {x_axis} vs {y_axis}')
-                    elif plot_type == "Bar Chart" and x_axis:
-                        fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None, title=f'Bar Chart of {x_axis}')
-                    elif plot_type == "Correlation Matrix":
-                        numeric_df = df.select_dtypes(include=np.number)
-                        if len(numeric_df.columns) > 1:
-                            corr = numeric_df.corr()
-                            fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title='Correlation Matrix')
-                    if fig:
-                        fig.update_layout(template="plotly_white")
-                        st.plotly_chart(fig, use_container_width=True)
-                        st.session_state.last_plot = {
-                            "type": plot_type,
-                            "x": x_axis,
-                            "y": y_axis,
-                            "data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
-                        }
-                        plot_text = extract_plot_data(st.session_state.last_plot, df)
-                        st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
-                        with st.expander("Extracted Plot Data"):
-                            st.text(plot_text)
-                    else:
-                        st.error("Please provide required inputs for the selected plot type.")
-                except Exception as e:
-                    st.error(f"Couldn't create visualization: {str(e)}")
-    # Chatbot Section
-    st.markdown("---")
-    st.markdown('<div class="chat-container">', unsafe_allow_html=True)
-    st.subheader("💬 AI Chatbot Assistant (RAG Enabled)")
-    st.info("Ask about your data or app features! Try: 'drop columns X, Y', 'scatter plot of X vs Y', 'analyze plot'")
-    for message in st.session_state.chat_history:
-        with st.chat_message(message["role"]):
-            st.markdown(f'<div class="{message["role"]}-message">{message["content"]}</div>', unsafe_allow_html=True)
-    user_input = st.chat_input("Ask me anything...")
-    if user_input:
-        st.session_state.chat_history.append({"role": "user", "content": user_input})
-        with st.chat_message("user"):
-            st.markdown(f'<div class="user-message">{user_input}</div>', unsafe_allow_html=True)
-        with st.spinner("Processing..."):
-            func, param = parse_command(user_input)
-            if func:
-                response = func(param) if param else func(None)
-            else:
-                response = get_chatbot_response(user_input, app_mode, st.session_state.vector_store, model)
-            st.session_state.chat_history.append({"role": "assistant", "content": response})
-        with st.chat_message("assistant"):
-            st.markdown(f'<div class="bot-message">{response}</div>', unsafe_allow_html=True)
-    st.markdown('</div>', unsafe_allow_html=True)
-    # Footer
-    st.markdown("""
-        <div class="footer">
-            <div>Built with <span class="tech-badge">Streamlit</span> + <span class="tech-badge">Groq</span> + <span class="tech-badge">LangChain</span> + <span class="tech-badge">FAISS</span></div>
-            <div style="margin-top: 8px;">Fast inference for data insights</div>
-        </div>
-    """, unsafe_allow_html=True)
 if __name__ == "__main__":
     main()

 import streamlit as st
 import pandas as pd
 import plotly.express as px
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPClassifier, MLPRegressor
+from sklearn.cluster import KMeans
+from sklearn.metrics import accuracy_score, r2_score, silhouette_score
+from sklearn.preprocessing import StandardScaler
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
 from groq import Groq
 from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.document_loaders import TextLoader
+import os
 import tempfile
+# Initialize clients
 client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+# Set page config
+st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide")
+# Custom CSS for Responsive Silver-Blue-Gold Theme with Top Nav
 st.markdown("""
     <style>
     :root {
     .stApp {
         background-color: var(--silver);
         font-family: 'Inter', sans-serif;
+        max-width: 1200px;
         margin: 0 auto;
         padding: 10px;
     }
         color: white;
         padding: 15px;
         border-radius: 5px;
         text-align: center;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
     }
     .header-title {
+        font-size: 1.8rem;
         font-weight: 700;
         margin: 0;
     }
     .header-subtitle {
+        font-size: 1rem;
         margin-top: 5px;
     }
+    .nav-bar {
         background-color: white;
         border-radius: 5px;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
         padding: 15px;
+        margin-bottom: 20px;
+        display: flex;
+        justify-content: space-around;
+        align-items: center;
     }
+    .nav-item {
+        color: var(--blue);
+        font-weight: 500;
+        cursor: pointer;
+        padding: 5px 10px;
+        border-radius: 5px;
+    }
+    .nav-item:hover {
+        background-color: var(--gold);
+        color: white;
+    }
+    .card {
         background-color: white;
         border-radius: 5px;
         box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        padding: 20px;
+        margin-bottom: 20px;
+    }
+    .chat-container {
+        background-color: white;
+        border-radius: 5px;
         padding: 15px;
         margin-top: 20px;
+        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
     }
     .user-message {
         background-color: var(--blue);
         color: white;
+        border-radius: 15px 15px 5px 15px;
+        padding: 10px;
         max-width: 80%;
+        margin-left: auto;
         margin-bottom: 10px;
     }
     .bot-message {
         background-color: #F0F0F0;
         color: var(--text-color);
+        border-radius: 15px 15px 15px 5px;
+        padding: 10px;
         max-width: 80%;
+        margin-right: auto;
         margin-bottom: 10px;
     }
     .stButton > button {
         background-color: var(--gold);
         color: white;
     }
     @media (max-width: 768px) {
         .header-title {
+            font-size: 1.4rem;
         }
         .header-subtitle {
+            font-size: 0.9rem;
+        }
+        .nav-bar {
+            flex-direction: column;
+            padding: 10px;
         }
+        .nav-item {
+            margin: 5px 0;
+            width: 100%;
+            text-align: center;
+        }
+        .card, .chat-container {
             padding: 10px;
         }
         .stApp {
             padding: 5px;
         }
     }
+# Footer
+    <footer style='text-align: center; padding: 10px; background-color: var(--blue); color: white; border-radius: 5px; margin-top: 20px;'>
+        <p>Created by Calvin Allen-Crawford</p>
+    </footer>
 """, unsafe_allow_html=True)
+# Session State Initialization
+if 'metrics' not in st.session_state:
+    st.session_state.metrics = {}
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+if 'vector_store' not in st.session_state:
+    st.session_state.vector_store = None
+if 'custom_layers' not in st.session_state:
+    st.session_state.custom_layers = []
+if 'prebuilt_selection' not in st.session_state:
+    st.session_state.prebuilt_selection = None
+if 'model_config' not in st.session_state:
+    st.session_state.model_config = {}
+if 'model_builder_mode' not in st.session_state:
+    st.session_state.model_builder_mode = "prebuilt"
+if 'custom_model_type' not in st.session_state:
+    st.session_state.custom_model_type = "classification"
+# Prebuilt Models
+PREBUILT_MODELS = {
+    "Legal Document Classifier": {
+        "description": "Optimized for legal document classification.",
+        "architecture": {"type": "classification", "hidden_layers": [(128, "relu"), (64, "relu")], "dropout": 0.3, "optimizer": "adam", "learning_rate": 0.001},
+        "domain": "Legal"
+    },
+    "Financial Fraud Detector": {
+        "description": "Detects anomalies in financial transactions.",
+        "architecture": {"type": "classification", "hidden_layers": [(256, "relu"), (128, "relu"), (64, "relu")], "dropout": 0.4, "optimizer": "adam", "learning_rate": 0.0005},
+        "domain": "Financial"
+    },
+    "Customer Segmentation Engine": {
+        "description": "Advanced customer segmentation.",
+        "architecture": {"type": "clustering", "n_clusters": 5, "algorithm": "kmeans", "init": "k-means++", "n_init": 10},
+        "domain": "Marketing"
+    }
+}
+# Helper Functions (unchanged)
 def convert_df_to_text(df):
     text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
     text += f"Missing Values: {df.isna().sum().sum()}\n"
     for col in df.columns:
+        text += f"- {col} ({df[col].dtype}): Mean={df[col].mean():.2f if pd.api.types.is_numeric_dtype(df[col]) else 'N/A'}\n"
     return text
 def create_vector_store(df_text):
         temp_path = temp_file.name
     loader = TextLoader(temp_path)
     documents = loader.load()
+    texts = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100).split_documents(documents)
     vector_store = FAISS.from_documents(texts, embeddings)
     os.unlink(temp_path)
     return vector_store
+def get_groq_response(prompt, mode):
     context = ""
+    if st.session_state.vector_store:
+        docs = st.session_state.vector_store.similarity_search(prompt, k=3)
+        context += "\nDataset Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
     try:
         response = client.chat.completions.create(
+            model="llama3-70b-8192",
             messages=[
+                {"role": "system", "content": f"You are an expert in {mode} data analysis.\n{context}"},
+                {"role": "user", "content": prompt}
+            ]
+        ).choices[0].message.content
+        return response
     except Exception as e:
         return f"Error: {str(e)}"
+def build_model_from_config(config, X, y=None):
+    problem_type = config.get("type", "classification")
+    if problem_type == "clustering":
+        return KMeans(n_clusters=config.get("n_clusters", 3), init=config.get("init", "k-means++"), n_init=config.get("n_init", 10), random_state=42)
+    hidden_layers = config.get("hidden_layers", [(100, "relu")])
+    layer_sizes = [size for size, _ in hidden_layers]
+    activation = hidden_layers[0][1] if hidden_layers else "relu"
+    if problem_type == "classification":
+        return MLPClassifier(hidden_layer_sizes=layer_sizes, activation=activation, solver=config.get("optimizer", "adam"), learning_rate_init=config.get("learning_rate", 0.001), random_state=42)
+    return MLPRegressor(hidden_layer_sizes=layer_sizes, activation=activation, solver=config.get("optimizer", "adam"), learning_rate_init=config.get("learning_rate", 0.001), random_state=42)
+# Main Application
 def main():
+    st.markdown('<div class="header"><h1 class="header-title">Neural-Vision Enhanced</h1><p class="header-subtitle">Build & Train Neural Networks</p></div>', unsafe_allow_html=True)
+    # Top Navigation Bar
+    st.markdown('<div class="nav-bar">', unsafe_allow_html=True)
+    col1, col2, col3 = st.columns([1, 2, 1])
+    with col1:
+        st.markdown('<div class="nav-item">Data Input</div>', unsafe_allow_html=True)
+        uploaded_file = st.file_uploader("Upload CSV Dataset", type=["csv"])
         if uploaded_file:
+            df = pd.read_csv(uploaded_file)
+            st.session_state.vector_store = create_vector_store(convert_df_to_text(df))
+            st.success("Dataset uploaded!")
+    with col2:
+        st.markdown('<div class="nav-item">Navigation</div>', unsafe_allow_html=True)
+        nav_option = st.selectbox("Navigate", ["Model Builder", "Chat", "Train Model"], label_visibility="collapsed")
+    with col3:
+        st.markdown('<div class="nav-item">Info</div>', unsafe_allow_html=True)
+        st.write("Built with Streamlit & Groq")
+    st.markdown('</div>', unsafe_allow_html=True)
+    # Main Content
+    if nav_option == "Model Builder":
+        st.markdown('<div class="card"><h2>Model Builder</h2></div>', unsafe_allow_html=True)
+        mode = st.selectbox("Domain", ["Legal", "Financial", "Marketing"])
+        model_builder_mode = st.radio("Mode", ["Prebuilt", "Custom"])
+        st.session_state.model_builder_mode = "prebuilt" if model_builder_mode == "Prebuilt" else "custom"
+        if st.session_state.model_builder_mode == "prebuilt":
+            for name, details in PREBUILT_MODELS.items():
+                if st.button(f"{name}: {details['description']}", key=name):
+                    st.session_state.prebuilt_selection = name
+                    st.session_state.model_config = details["architecture"]
+            if st.session_state.prebuilt_selection:
+                st.json(st.session_state.model_config)
         else:
+            st.session_state.custom_model_type = st.selectbox("Type", ["classification", "regression", "clustering"])
+            if st.session_state.custom_model_type != "clustering":
+                layer_count = st.number_input("Layers", min_value=1, value=1)
+                st.session_state.custom_layers = []
+                for i in range(int(layer_count)):
+                    size = st.number_input(f"Layer {i+1} Size", min_value=1, value=100, key=f"size_{i}")
+                    activation = st.selectbox(f"Layer {i+1} Activation", ["relu", "tanh"], key=f"act_{i}")
+                    st.session_state.custom_layers.append((size, activation))
+                optimizer = st.selectbox("Optimizer", ["adam", "sgd"])
+                st.session_state.model_config = {"type": st.session_state.custom_model_type, "hidden_layers": st.session_state.custom_layers, "optimizer": optimizer, "learning_rate": 0.001}
             else:
+                st.session_state.model_config = {"type": "clustering", "n_clusters": st.number_input("Clusters", min_value=2, value=3)}
+            if st.button("Finalize"): st.json(st.session_state.model_config)
+    elif nav_option == "Chat":
+        st.markdown('<div class="chat-container"><h3>Chat with Grok</h3></div>', unsafe_allow_html=True)
+        mode = st.selectbox("Domain", ["Legal", "Financial", "Marketing"])
+        prompt = st.text_input("Ask a question:")
+        if prompt:
+            response = get_groq_response(prompt, mode)
+            st.session_state.chat_history.append({"role": "user", "content": prompt})
+            st.session_state.chat_history.append({"role": "bot", "content": response})
+        for msg in st.session_state.chat_history:
+            st.markdown(f'<div class={"user-message" if msg["role"] == "user" else "bot-message"}>{msg["content"]}</div>', unsafe_allow_html=True)
+    elif nav_option == "Train Model":
+        if uploaded_file and st.session_state.model_config:
+            st.markdown('<div class="card"><h2>Train Model</h2></div>', unsafe_allow_html=True)
+            df = pd.read_csv(uploaded_file)
+            X = df.drop(columns=[df.columns[-1]]) if st.session_state.model_config["type"] != "clustering" else df
+            y = df[df.columns[-1]] if st.session_state.model_config["type"] != "clustering" else None
+            if st.button("Train"):
                 scaler = StandardScaler()
+                X_scaled = scaler.fit_transform(X)
+                model = build_model_from_config(st.session_state.model_config, X_scaled, y)
+                if st.session_state.model_config["type"] != "clustering":
+                    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
+                    model.fit(X_train, y_train)
+                    y_pred = model.predict(X_test)
+                    st.session_state.metrics = {"accuracy" if st.session_state.model_config["type"] == "classification" else "r2_score": accuracy_score(y_test, y_pred) if st.session_state.model_config["type"] == "classification" else r2_score(y_test, y_pred)}
+                else:
+                    model.fit(X_scaled)
+                    st.session_state.metrics = {"silhouette_score": silhouette_score(X_scaled, model.labels_)}
+                st.json(st.session_state.metrics)
+        else:
+            st.warning("Upload a dataset and configure a model first!")
 if __name__ == "__main__":
     main()