Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 19

Commit

ed852a2

verified ·

1 Parent(s): 42bab1b

Update app.py

Browse files

Files changed (1) hide show

app.py +257 -238

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-import gradio as gr
 import pandas as pd
 import numpy as np
 import plotly.express as px
 from ydata_profiling import ProfileReport
 import os
 from dotenv import load_dotenv
 from groq import Groq
@@ -16,6 +17,9 @@ from sklearn.preprocessing import StandardScaler, LabelEncoder
 import tempfile
 import json
 # Load environment variables
 load_dotenv()
@@ -25,128 +29,6 @@ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 # Initialize HuggingFace embeddings
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# Helper Functions (unchanged from your original)
-def update_cleaned_data(df):
-    gr.State(value=df)
-    if 'data_versions' not in gr.State():
-        gr.State(value=[gr.State(value=df.copy())])
-    gr.State(value=gr.State(value=gr.State(value=df.copy())))
-    return df, "✅ Action completed successfully!"
-def convert_df_to_text(df):
-    text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
-    text += f"Missing Values: {df.isna().sum().sum()}\n"
-    text += "Columns:\n"
-    for col in df.columns:
-        text += f"- {col} ({df[col].dtype}): "
-        if pd.api.types.is_numeric_dtype(df[col]):
-            text += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
-        else:
-            text += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
-        text += f", Missing={df[col].isna().sum()}\n"
-    return text
-def create_vector_store(df_text):
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
-        temp_file.write(df_text)
-        temp_path = temp_file.name
-    loader = TextLoader(temp_path)
-    documents = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-    texts = text_splitter.split_documents(documents)
-    vector_store = FAISS.from_documents(texts, embeddings)
-    os.unlink(temp_path)
-    return vector_store
-def update_vector_store_with_plot(plot_text, existing_vector_store):
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
-        temp_file.write(plot_text)
-        temp_path = temp_file.name
-    loader = TextLoader(temp_path)
-    documents = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
-    texts = text_splitter.split_documents(documents)
-    if existing_vector_store:
-        existing_vector_store.add_documents(texts)
-    else:
-        existing_vector_store = FAISS.from_documents(texts, embeddings)
-    os.unlink(temp_path)
-    return existing_vector_store
-def extract_plot_data(plot_info, df):
-    plot_type = plot_info["type"]
-    x_col = plot_info["x"]
-    y_col = plot_info["y"] if "y" in plot_info else None
-    data = pd.read_json(plot_info["data"])
-    plot_text = f"Plot Type: {plot_type}\nX-Axis: {x_col}\n"
-    if y_col:
-        plot_text += f"Y-Axis: {y_col}\n"
-    if plot_type == "Scatter Plot" and y_col:
-        correlation = data[x_col].corr(data[y_col])
-        slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
-        plot_text += f"Correlation: {correlation:.2f}\nLinear Regression: Slope={slope:.2f}, Intercept={intercept:.2f}, R²={r_value**2:.2f}, p-value={p_value:.4f}\n"
-    return plot_text
-def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
-    system_prompt = (
-        f"You are an AI assistant in Data-Vision Pro, on the '{app_mode}' page:\n"
-        "- Data Upload: Upload CSV/XLSX files, view stats, or generate reports.\n"
-        "- Data Cleaning: Clean data (e.g., handle missing values, encode variables).\n"
-        "- EDA: Visualize data (e.g., scatter plots, histograms) and analyze plots.\n"
-        "Use context if provided."
-    )
-    context = ""
-    if vector_store:
-        docs = vector_store.similarity_search(user_input, k=3)
-        if docs:
-            context = "\n\nContext:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
-    try:
-        response = client.chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": system_prompt + context},
-                {"role": "user", "content": user_input}
-            ],
-            temperature=0.7,
-            max_tokens=1024
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        return f"Error: {str(e)}"
-def parse_command(command, df, vector_store):
-    command = command.lower().strip()
-    if "drop columns" in command:
-        columns = command.replace("drop columns", "").strip().split(',')
-        valid_cols = [col.strip() for col in columns if col.strip() in df.columns]
-        if valid_cols:
-            df = df.drop(columns=valid_cols)
-            return update_cleaned_data(df)[0], f"Dropped columns: {', '.join(valid_cols)}"
-        return df, "No valid columns to drop."
-    elif "scatter plot of" in command:
-        match = re.search(r"([\w\s]+)\s+vs\s+([\w\s]+)", command)
-        if match:
-            x, y = match.group(1).strip(), match.group(2).strip()
-            if x in df.columns and y in df.columns:
-                fig = px.scatter(df, x=x, y=y)
-                plot_info = {"type": "Scatter Plot", "x": x, "y": y, "data": df[[x, y]].to_json()}
-                return df, fig, plot_info
-        return df, None, "Invalid scatter plot command."
-    elif "histogram of" in command:
-        col = command.replace("histogram of", "").strip()
-        if col in df.columns:
-            fig = px.histogram(df, x=col)
-            plot_info = {"type": "Histogram", "x": col, "data": df[[col]].to_json()}
-            return df, fig, plot_info
-        return df, None, "Invalid histogram command."
-    elif "analyze plot" in command and "last_plot" in gr.State():
-        plot_info = gr.State(value="last_plot")
-        plot_text = extract_plot_data(plot_info, df)
-        if vector_store:
-            vector_store = update_vector_store_with_plot(plot_text, vector_store)
-        return df, plot_text
-    return df, None, None
 # Custom HTML/JS for Enhanced UI
 custom_html = """
 <style>
@@ -159,6 +41,11 @@ custom_html = """
     --gold: #A87E01;
     --shadow-color: rgba(0,0,0,0.1);
   }
   .header {
     background: linear-gradient(90deg, var(--blue) 80%, var(--blue-dark) 100%);
     color: white;
@@ -224,7 +111,7 @@ custom_html = """
 </style>
 <div class="header">
   <h1>Data-Vision Pro</h1>
-  <div>Advanced Data Analysis with Groq</div>
 </div>
 <div class="nav-tabs">
   <div class="nav-tab active" data-tab="upload">Data Upload</div>
@@ -233,18 +120,18 @@ custom_html = """
 </div>
 <div id="upload" class="tab-content active">
   <h2>📤 Data Upload & Profiling</h2>
-  <!-- Gradio components will be injected here -->
 </div>
 <div id="cleaning" class="tab-content">
   <h2>🧹 Data Cleaning</h2>
-  <!-- Gradio components will be injected here -->
 </div>
 <div id="eda" class="tab-content">
   <h2>🔍 Interactive Data Explorer</h2>
-  <!-- Gradio components will be injected here -->
 </div>
 <div class="chat-container">
-  <h2>💬 AI Chatbot Assistant</h2>
   <div id="chat" style="max-height:300px; overflow-y:auto;"></div>
   <input id="chat-input" placeholder="Ask me anything..." style="width:80%;">
   <button onclick="sendChat()">Send</button>
@@ -271,12 +158,12 @@ custom_html = """
     chat.innerHTML += `<div class="message user-message">${message}</div>`;
     chat.scrollTop = chat.scrollHeight;
-    // Trigger Gradio event
-    const event = new CustomEvent('chat_submit', { detail: message });
-    document.dispatchEvent(event);
   }
-  // Listen for bot responses from Gradio
   document.addEventListener('bot_response', (e) => {
     const chat = document.getElementById('chat');
     chat.innerHTML += `<div class="message bot-message">${e.detail}</div>`;
@@ -285,120 +172,252 @@ custom_html = """
 </script>
 """
-# Gradio Interface
-def main_interface(file, chat_input, cleaned_data, vector_store, last_plot, app_mode, model):
-    outputs = {}
-    # Data Upload
-    if file and app_mode == "Data Upload":
-        if file.name.endswith('.csv'):
-            df = pd.read_csv(file)
         else:
-            df = pd.read_excel(file)
-        cleaned_data, msg = update_cleaned_data(df)
-        vector_store = create_vector_store(convert_df_to_text(df))
-        metrics_html = f"""
-        <div class="metrics">
-          <div class="metric">Rows: {df.shape[0]}</div>
-          <div class="metric">Columns: {df.shape[1]}</div>
-          <div class="metric">Missing: {df.isna().sum().sum()}</div>
-        </div>
-        """
-        outputs["upload_output"] = gr.HTML(value=metrics_html + f"<pre>{df.head().to_string()}</pre>")
-        outputs["status"] = msg
-        outputs["cleaned_data"] = cleaned_data
-        outputs["vector_store"] = vector_store
-    # Data Cleaning
-    elif app_mode == "Data Cleaning" and cleaned_data is not None:
-        df = cleaned_data
-        metrics_html = f"""
-        <div class="metrics">
-          <div class="metric">Rows: {df.shape[0]}</div>
-          <div class="metric">Columns: {df.shape[1]}</div>
-          <div class="metric">Missing: {df.isna().sum().sum()}</div>
-        </div>
-        """
-        outputs["cleaning_output"] = gr.HTML(value=metrics_html)
-    # EDA
-    elif app_mode == "EDA" and cleaned_data is not None:
-        df = cleaned_data
-        metrics_html = f"""
-        <div class="metrics">
-          <div class="metric">Rows: {df.shape[0]}</div>
-          <div class="metric">Columns: {df.shape[1]}</div>
-          <div class="metric">Missing: {df.isna().sum().sum()}</div>
-        </div>
-        """
-        outputs["eda_output"] = gr.HTML(value=metrics_html)
-    # Chatbot
     if chat_input:
-        df = cleaned_data if cleaned_data is not None else pd.DataFrame()
-        new_df, plot_fig, plot_info_or_msg = parse_command(chat_input, df, vector_store)
-        if plot_fig:
-            outputs["plot"] = plot_fig
-            outputs["last_plot"] = plot_info_or_msg
-            vector_store = update_vector_store_with_plot(extract_plot_data(plot_info_or_msg, df), vector_store)
-            outputs["vector_store"] = vector_store
-            response = f"Generated {plot_info_or_msg['type'].lower()}."
-        elif isinstance(plot_info_or_msg, str):
-            response = plot_info_or_msg
             if "Dropped columns" in response:
-                outputs["cleaned_data"] = new_df
-                outputs["vector_store"] = create_vector_store(convert_df_to_text(new_df))
         else:
-            response = get_chatbot_response(chat_input, app_mode, vector_store, model)
-        outputs["status"] = response
-        # Trigger JS event for chatbot
-        outputs["chat_output"] = gr.HTML(value=f"""
         <script>
           document.dispatchEvent(new CustomEvent('bot_response', {{ detail: {json.dumps(response)} }}));
         </script>
-        """)
-    return outputs
-# Gradio App
-with gr.Blocks(title="Data-Vision Pro") as demo:
-    # State Variables
-    cleaned_data = gr.State()
-    vector_store = gr.State()
-    last_plot = gr.State()
-    # Custom HTML
-    gr.HTML(custom_html)
-    # Hidden App Mode Input
-    app_mode = gr.Textbox(value="Data Upload", elem_id="app-mode", visible=False)
-    # Inputs
-    with gr.Row():
-        file_input = gr.File(label="Upload CSV/XLSX")
-        model = gr.Dropdown(choices=["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], value="llama3-70b-8192", label="Groq Model")
-    # Outputs
-    upload_output = gr.HTML(label="Upload Results", elem_id="upload-output")
-    cleaning_output = gr.HTML(label="Cleaning Results", elem_id="cleaning-output")
-    eda_output = gr.HTML(label="EDA Results", elem_id="eda-output")
-    plot = gr.Plot(label="Visualization")
-    status = gr.Textbox(label="Status")
-    chat_output = gr.HTML(visible=False)  # Hidden output to trigger JS
-    # Chat Input
-    chat_input = gr.Textbox(label="Chat with AI", interactive=True, placeholder="Ask me anything...")
-    # Event Handling
-    file_input.change(
-        main_interface,
-        inputs=[file_input, chat_input, cleaned_data, vector_store, last_plot, app_mode, model],
-        outputs=[upload_output, cleaning_output, eda_output, plot, status, chat_output, cleaned_data, vector_store, last_plot]
-    )
-    chat_input.submit(
-        main_interface,
-        inputs=[file_input, chat_input, cleaned_data, vector_store, last_plot, app_mode, model],
-        outputs=[upload_output, cleaning_output, eda_output, plot, status, chat_output, cleaned_data, vector_store, last_plot]
-    )
-demo.launch()

+import streamlit as st
 import pandas as pd
 import numpy as np
 import plotly.express as px
 from ydata_profiling import ProfileReport
+from streamlit_pandas_profiling import st_profile_report
 import os
 from dotenv import load_dotenv
 from groq import Groq
 import tempfile
 import json
+# Set page config
+st.set_page_config(page_title="Data-Vision Pro", layout="wide")
 # Load environment variables
 load_dotenv()
 # Initialize HuggingFace embeddings
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 # Custom HTML/JS for Enhanced UI
 custom_html = """
 <style>
     --gold: #A87E01;
     --shadow-color: rgba(0,0,0,0.1);
   }
+  .stApp {
+    background: linear-gradient(135deg, var(--silver-light) 0%, var(--silver-dark) 100%);
+    font-family: 'Inter', sans-serif;
+    transition: all 0.3s ease;
+  }
   .header {
     background: linear-gradient(90deg, var(--blue) 80%, var(--blue-dark) 100%);
     color: white;
 </style>
 <div class="header">
   <h1>Data-Vision Pro</h1>
+  <div>Advanced Data Analysis with Groq Inference</div>
 </div>
 <div class="nav-tabs">
   <div class="nav-tab active" data-tab="upload">Data Upload</div>
 </div>
 <div id="upload" class="tab-content active">
   <h2>📤 Data Upload & Profiling</h2>
+  <div id="upload-output"></div>
 </div>
 <div id="cleaning" class="tab-content">
   <h2>🧹 Data Cleaning</h2>
+  <div id="cleaning-output"></div>
 </div>
 <div id="eda" class="tab-content">
   <h2>🔍 Interactive Data Explorer</h2>
+  <div id="eda-output"></div>
 </div>
 <div class="chat-container">
+  <h2>💬 AI Chatbot Assistant (RAG Enabled)</h2>
   <div id="chat" style="max-height:300px; overflow-y:auto;"></div>
   <input id="chat-input" placeholder="Ask me anything..." style="width:80%;">
   <button onclick="sendChat()">Send</button>
     chat.innerHTML += `<div class="message user-message">${message}</div>`;
     chat.scrollTop = chat.scrollHeight;
+    // Trigger Streamlit event via hidden input
+    document.getElementById('chat-trigger').value = message;
+    document.getElementById('chat-trigger').dispatchEvent(new Event('change'));
   }
+  // Listen for bot responses from Streamlit
   document.addEventListener('bot_response', (e) => {
     const chat = document.getElementById('chat');
     chat.innerHTML += `<div class="message bot-message">${e.detail}</div>`;
 </script>
 """
+# Helper Functions (mostly unchanged)
+def update_cleaned_data(df):
+    st.session_state.cleaned_data = df
+    if 'data_versions' not in st.session_state:
+        st.session_state.data_versions = [st.session_state.raw_data.copy()]
+    st.session_state.data_versions.append(df.copy())
+    st.session_state.dataset_text = convert_df_to_text(df)
+    return "✅ Action completed successfully!"
+def convert_df_to_text(df):
+    text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
+    text += f"Missing Values: {df.isna().sum().sum()}\n"
+    text += "Columns:\n"
+    for col in df.columns:
+        text += f"- {col} ({df[col].dtype}): "
+        if pd.api.types.is_numeric_dtype(df[col]):
+            text += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
         else:
+            text += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
+        text += f", Missing={df[col].isna().sum()}\n"
+    return text
+def create_vector_store(df_text):
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
+        temp_file.write(df_text)
+        temp_path = temp_file.name
+    loader = TextLoader(temp_path)
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+    texts = text_splitter.split_documents(documents)
+    vector_store = FAISS.from_documents(texts, embeddings)
+    os.unlink(temp_path)
+    return vector_store
+def update_vector_store_with_plot(plot_text, existing_vector_store):
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
+        temp_file.write(plot_text)
+        temp_path = temp_file.name
+    loader = TextLoader(temp_path)
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+    texts = text_splitter.split_documents(documents)
+    if existing_vector_store:
+        existing_vector_store.add_documents(texts)
+    else:
+        existing_vector_store = FAISS.from_documents(texts, embeddings)
+    os.unlink(temp_path)
+    return existing_vector_store
+def extract_plot_data(plot_info, df):
+    plot_type = plot_info["type"]
+    x_col = plot_info["x"]
+    y_col = plot_info["y"] if "y" in plot_info else None
+    data = pd.read_json(plot_info["data"])
+    plot_text = f"Plot Type: {plot_type}\nX-Axis: {x_col}\n"
+    if y_col:
+        plot_text += f"Y-Axis: {y_col}\n"
+    if plot_type == "Scatter Plot" and y_col:
+        correlation = data[x_col].corr(data[y_col])
+        slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
+        plot_text += f"Correlation: {correlation:.2f}\nLinear Regression: Slope={slope:.2f}, Intercept={intercept:.2f}, R²={r_value**2:.2f}, p-value={p_value:.4f}\n"
+    return plot_text
+def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
+    system_prompt = (
+        f"You are an AI assistant in Data-Vision Pro, on the '{app_mode}' page:\n"
+        "- Data Upload: Upload CSV/XLSX files, view stats, or generate reports.\n"
+        "- Data Cleaning: Clean data (e.g., handle missing values, encode variables).\n"
+        "- EDA: Visualize data (e.g., scatter plots, histograms) and analyze plots.\n"
+        "Use context if provided."
+    )
+    context = ""
+    if vector_store:
+        docs = vector_store.similarity_search(user_input, k=3)
+        if docs:
+            context = "\n\nContext:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
+    try:
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {"role": "system", "content": system_prompt + context},
+                {"role": "user", "content": user_input}
+            ],
+            temperature=0.7,
+            max_tokens=1024
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error: {str(e)}"
+def parse_command(command, df):
+    command = command.lower().strip()
+    if "drop columns" in command:
+        columns = command.replace("drop columns", "").strip().split(',')
+        valid_cols = [col.strip() for col in columns if col.strip() in df.columns]
+        if valid_cols:
+            df = df.drop(columns=valid_cols)
+            update_cleaned_data(df)
+            return df, f"Dropped columns: {', '.join(valid_cols)}"
+        return df, "No valid columns to drop."
+    elif "scatter plot of" in command:
+        match = re.search(r"([\w\s]+)\s+vs\s+([\w\s]+)", command)
+        if match:
+            x, y = match.group(1).strip(), match.group(2).strip()
+            if x in df.columns and y in df.columns:
+                fig = px.scatter(df, x=x, y=y)
+                plot_info = {"type": "Scatter Plot", "x": x, "y": y, "data": df[[x, y]].to_json()}
+                return df, fig, plot_info
+        return df, None, "Invalid scatter plot command."
+    elif "histogram of" in command:
+        col = command.replace("histogram of", "").strip()
+        if col in df.columns:
+            fig = px.histogram(df, x=col)
+            plot_info = {"type": "Histogram", "x": col, "data": df[[col]].to_json()}
+            return df, fig, plot_info
+        return df, None, "Invalid histogram command."
+    elif "analyze plot" in command and "last_plot" in st.session_state:
+        plot_info = st.session_state.last_plot
+        plot_text = extract_plot_data(plot_info, df)
+        st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
+        return df, plot_text
+    return df, None, None
+# Main App
+def main():
+    # Render Custom HTML
+    st.markdown(custom_html, unsafe_allow_html=True)
+    # Hidden Inputs for JS Interaction
+    if 'app_mode' not in st.session_state:
+        st.session_state.app_mode = "Data Upload"
+    app_mode = st.markdown('<input id="app-mode" type="hidden" value="Data Upload">', unsafe_allow_html=True)
+    chat_trigger = st.markdown('<input id="chat-trigger" type="hidden">', unsafe_allow_html=True)
+    # Sidebar
+    with st.sidebar:
+        st.markdown("### 🔮 Data-Vision Pro")
+        model = st.selectbox("Select Groq Model", ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], index=0)
+        if 'cleaned_data' in st.session_state:
+            csv = st.session_state.cleaned_data.to_csv(index=False)
+            st.download_button(label="Download Cleaned Data", data=csv, file_name='cleaned_data.csv', mime='text/csv')
+    # Initialize Session State
+    if 'vector_store' not in st.session_state:
+        st.session_state.vector_store = None
+    if 'chat_history' not in st.session_state:
+        st.session_state.chat_history = []
+    # App Logic
+    if st.session_state.app_mode == "Data Upload":
+        uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"])
+        if uploaded_file:
+            if uploaded_file.name.endswith('.csv'):
+                df = pd.read_csv(uploaded_file)
+            else:
+                df = pd.read_excel(uploaded_file)
+            st.session_state.raw_data = df
+            st.session_state.cleaned_data = df.copy()
+            st.session_state.dataset_text = convert_df_to_text(df)
+            st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
+            if 'data_versions' not in st.session_state:
+                st.session_state.data_versions = [df.copy()]
+            metrics_html = f"""
+            <div class="metrics">
+              <div class="metric">Rows: {df.shape[0]}</div>
+              <div class="metric">Columns: {df.shape[1]}</div>
+              <div class="metric">Missing: {df.isna().sum().sum()}</div>
+            </div>
+            <pre>{df.head().to_string()}</pre>
+            """
+            st.markdown(f'<div id="upload-output">{metrics_html}</div>', unsafe_allow_html=True)
+            if st.button("Generate Full Profile Report"):
+                with st.spinner("Generating report..."):
+                    pr = ProfileReport(df, explorative=True)
+                    st_profile_report(pr)
+    elif st.session_state.app_mode == "Data Cleaning":
+        if 'cleaned_data' not in st.session_state:
+            st.warning("Please upload data first.")
+        else:
+            df = st.session_state.cleaned_data
+            metrics_html = f"""
+            <div class="metrics">
+              <div class="metric">Rows: {df.shape[0]}</div>
+              <div class="metric">Columns: {df.shape[1]}</div>
+              <div class="metric">Missing: {df.isna().sum().sum()}</div>
+            </div>
+            """
+            st.markdown(f'<div id="cleaning-output">{metrics_html}</div>', unsafe_allow_html=True)
+            cols_to_drop = st.multiselect("Select columns to drop", df.columns)
+            if cols_to_drop and st.button("Drop Columns"):
+                df = df.drop(columns=cols_to_drop)
+                update_cleaned_data(df)
+                st.rerun()
+    elif st.session_state.app_mode == "EDA":
+        if 'cleaned_data' not in st.session_state:
+            st.warning("Please upload data first.")
+        else:
+            df = st.session_state.cleaned_data
+            metrics_html = f"""
+            <div class="metrics">
+              <div class="metric">Rows: {df.shape[0]}</div>
+              <div class="metric">Columns: {df.shape[1]}</div>
+              <div class="metric">Missing: {df.isna().sum().sum()}</div>
+            </div>
+            """
+            st.markdown(f'<div id="eda-output">{metrics_html}</div>', unsafe_allow_html=True)
+            plot_type = st.selectbox("Choose visualization type", ["Scatter Plot", "Histogram"])
+            x_axis = st.selectbox("X-axis", df.columns)
+            y_axis = st.selectbox("Y-axis", df.columns) if plot_type == "Scatter Plot" else None
+            if st.button("Generate Plot"):
+                if plot_type == "Scatter Plot" and x_axis and y_axis:
+                    fig = px.scatter(df, x=x_axis, y=y_axis)
+                    st.session_state.last_plot = {"type": "Scatter Plot", "x": x_axis, "y": y_axis, "data": df[[x_axis, y_axis]].to_json()}
+                elif plot_type == "Histogram" and x_axis:
+                    fig = px.histogram(df, x=x_axis)
+                    st.session_state.last_plot = {"type": "Histogram", "x": x_axis, "data": df[[x_axis]].to_json()}
+                st.plotly_chart(fig)
+    # Chatbot Logic
+    chat_input = st.session_state.get('chat_input', '')
     if chat_input:
+        st.session_state.chat_history.append({"role": "user", "content": chat_input})
+        df = st.session_state.cleaned_data if 'cleaned_data' in st.session_state else pd.DataFrame()
+        new_df, result, plot_info = parse_command(chat_input, df)
+        if isinstance(result, px.scatter._chart_types.Scatter) or isinstance(result, px.histogram._chart_types.Histogram):
+            st.plotly_chart(result)
+            st.session_state.last_plot = plot_info
+            st.session_state.vector_store = update_vector_store_with_plot(extract_plot_data(plot_info, new_df), st.session_state.vector_store)
+            response = f"Generated {plot_info['type'].lower()}."
+        elif isinstance(result, str):
+            response = result
             if "Dropped columns" in response:
+                st.session_state.cleaned_data = new_df
+                st.session_state.vector_store = create_vector_store(convert_df_to_text(new_df))
+                st.rerun()
         else:
+            response = get_chatbot_response(chat_input, st.session_state.app_mode, st.session_state.vector_store, model)
+        st.session_state.chat_history.append({"role": "assistant", "content": response})
+        st.markdown(f"""
         <script>
           document.dispatchEvent(new CustomEvent('bot_response', {{ detail: {json.dumps(response)} }}));
         </script>
+        """, unsafe_allow_html=True)
+    st.session_state.chat_input = st.text_input("Chat with AI", key="chat_input", on_change=lambda: None)
+if __name__ == "__main__":
+    main()