Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 14

Commit

9e431a9

verified ·

1 Parent(s): bc938fb

Update app.py

Browse files

Files changed (1) hide show

app.py +300 -26

app.py CHANGED Viewed

@@ -8,20 +8,14 @@ from streamlit_pandas_profiling import st_profile_report
 import os
 import requests
 import json
-from datetime import datetime
 import re
-import tempfile
 from scipy import stats
-from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
 from sklearn.decomposition import PCA
-import streamlit.components.v1 as components
-from io import StringIO
 from dotenv import load_dotenv
 from flask import Flask, request, jsonify
 from openai import OpenAI
 import threading
-from sentence_transformers import SentenceTransformer
 # Load environment variables
 load_dotenv()
@@ -30,13 +24,6 @@ load_dotenv()
 flask_app = Flask(__name__)
 FLASK_PORT = 5000  # Internal port for Flask, not exposed externally
-# Initialize OpenAI client
-api_key = os.getenv("OPENAI_API_KEY")
-if not api_key:
-    st.error("OPENAI_API_KEY not set. Please configure it in the Hugging Face Space secrets.")
-    st.stop()
-client = OpenAI(api_key=api_key)
 # Flask RAG Endpoint
 @flask_app.route('/rag_chat', methods=['POST'])
 def rag_chat():
@@ -45,7 +32,6 @@ def rag_chat():
     app_mode = data.get('app_mode', 'Data Upload')
     dataset_text = data.get('dataset_text', '')
-    # RAG Logic: Use dataset_text as retrieval context
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
         "The app has three pages:\n"
@@ -71,7 +57,7 @@ def rag_chat():
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": user_input}
             ],
-            max_tokens=100,  # Increased for RAG context
             temperature=0.7
         )
         return jsonify({"response": response.choices[0].message.content})
@@ -82,7 +68,6 @@ def rag_chat():
 def run_flask():
     flask_app.run(host='0.0.0.0', port=FLASK_PORT, debug=False, use_reloader=False)
-# Start Flask thread
 flask_thread = threading.Thread(target=run_flask, daemon=True)
 flask_thread.start()
@@ -95,11 +80,11 @@ def update_cleaned_data(df):
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]
     st.session_state.data_versions.append(df.copy())
     st.success("✅ Action completed successfully!")
     st.rerun()
 def convert_csv_to_json_and_text(df):
-    """Convert DataFrame to JSON and then to plain text."""
     json_data = df.to_json(orient="records")
     data_dict = json.loads(json_data)
     text_summary = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
@@ -115,7 +100,6 @@ def convert_csv_to_json_and_text(df):
     return text_summary
 def get_chatbot_response(user_input, app_mode, dataset_text=""):
-    """Send request to internal Flask RAG endpoint."""
     payload = {
         "user_input": user_input,
         "app_mode": app_mode,
@@ -128,8 +112,88 @@ def get_chatbot_response(user_input, app_mode, dataset_text=""):
     except requests.exceptions.RequestException as e:
         return f"Error: Could not connect to RAG server. {str(e)}"
-# Streamlit App
-# Sidebar Navigation
 with st.sidebar:
     st.title("🔮 Data-Vision Pro")
     st.markdown("Your AI-powered data analysis suite with RAG.")
@@ -145,6 +209,13 @@ with st.sidebar:
         st.info("🧹 Clean and preprocess your data using various tools.")
     elif app_mode == "EDA":
         st.info("🔍 Explore your data visually and statistically.")
     st.markdown("---")
     st.markdown("**Note**: Requires dependencies in `requirements.txt`.")
@@ -159,15 +230,29 @@ with st.sidebar:
     st.markdown("Created by Calvin Allen-Crawford")
     st.markdown("v1.0 | © 2025")
 # Main App Pages
 if app_mode == "Data Upload":
     st.title("📤 Data Upload & Profiling")
     st.header("Upload Your Dataset")
     st.write("Supported formats: CSV, XLSX")
     if 'raw_data' not in st.session_state:
         st.info("It looks like no dataset has been uploaded yet. Would you like to upload a CSV or XLSX file?")
     uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"], key="file_uploader")
     if uploaded_file:
         st.session_state.pop('raw_data', None)
@@ -182,6 +267,7 @@ if app_mode == "Data Upload":
                 st.error("Uploaded file is empty.")
                 st.stop()
             st.session_state.raw_data = df
             st.session_state.dataset_text = convert_csv_to_json_and_text(df)
             if 'data_versions' not in st.session_state:
                 st.session_state.data_versions = [df.copy()]
@@ -226,6 +312,92 @@ elif app_mode == "Data Cleaning":
                 st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
                 st.rerun()
 elif app_mode == "EDA":
     st.title("🔍 Interactive Data Explorer")
     if 'cleaned_data' not in st.session_state:
@@ -242,10 +414,109 @@ elif app_mode == "EDA":
         col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
         col4.metric("Duplicates", df.duplicated().sum())
 # Chatbot Section
 st.markdown("---")
 st.subheader("💬 AI Chatbot Assistant (RAG Enabled)")
-st.info("Ask me about the app or your data! Try: 'What can I do here?' or 'What’s in the dataset?'")
 if "chat_history" not in st.session_state:
     st.session_state.chat_history = []
@@ -258,10 +529,13 @@ if user_input:
     st.session_state.chat_history.append({"role": "user", "content": user_input})
     with st.chat_message("user"):
         st.markdown(user_input)
-    with st.spinner("Thinking with RAG..."):
         dataset_text = st.session_state.get("dataset_text", "")
-        response = get_chatbot_response(user_input, app_mode, dataset_text)
         st.session_state.chat_history.append({"role": "assistant", "content": response})
     with st.chat_message("assistant"):
         st.markdown(response)

 import os
 import requests
 import json
 import re
 from scipy import stats
 from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
 from sklearn.decomposition import PCA
 from dotenv import load_dotenv
 from flask import Flask, request, jsonify
 from openai import OpenAI
 import threading
 # Load environment variables
 load_dotenv()
 flask_app = Flask(__name__)
 FLASK_PORT = 5000  # Internal port for Flask, not exposed externally
 # Flask RAG Endpoint
 @flask_app.route('/rag_chat', methods=['POST'])
 def rag_chat():
     app_mode = data.get('app_mode', 'Data Upload')
     dataset_text = data.get('dataset_text', '')
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
         "The app has three pages:\n"
                 {"role": "system", "content": system_prompt},
                 {"role": "user", "content": user_input}
             ],
+            max_tokens=100,
             temperature=0.7
         )
         return jsonify({"response": response.choices[0].message.content})
 def run_flask():
     flask_app.run(host='0.0.0.0', port=FLASK_PORT, debug=False, use_reloader=False)
 flask_thread = threading.Thread(target=run_flask, daemon=True)
 flask_thread.start()
     if 'data_versions' not in st.session_state:
         st.session_state.data_versions = [st.session_state.raw_data.copy()]
     st.session_state.data_versions.append(df.copy())
+    st.session_state.dataset_text = convert_csv_to_json_and_text(df)
     st.success("✅ Action completed successfully!")
     st.rerun()
 def convert_csv_to_json_and_text(df):
     json_data = df.to_json(orient="records")
     data_dict = json.loads(json_data)
     text_summary = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
     return text_summary
 def get_chatbot_response(user_input, app_mode, dataset_text=""):
     payload = {
         "user_input": user_input,
         "app_mode": app_mode,
     except requests.exceptions.RequestException as e:
         return f"Error: Could not connect to RAG server. {str(e)}"
+# Command Functions for LLM
+def drop_columns(columns):
+    if 'cleaned_data' in st.session_state:
+        df = st.session_state.cleaned_data.copy()
+        columns_to_drop = [col.strip() for col in columns.split(',')]
+        valid_columns = [col for col in columns_to_drop if col in df.columns]
+        if valid_columns:
+            df.drop(valid_columns, axis=1, inplace=True)
+            update_cleaned_data(df)
+            return f"Dropped columns: {', '.join(valid_columns)}"
+        else:
+            return "No valid columns found to drop."
+    return "No dataset loaded."
+# LLM-Driven EDA Commands
+def generate_scatter_plot(params):
+    df = st.session_state.cleaned_data
+    match = re.search(r"([\w\s]+)\s+vs\s+([\w\s]+)", params)
+    if match and len(match.groups()) >= 2:
+        x_axis, y_axis = match.group(1).strip(), match.group(2).strip()
+        if x_axis in df.columns and y_axis in df.columns:
+            fig = px.scatter(df, x=x_axis, y=y_axis, title=f'Scatter Plot of {x_axis} vs {y_axis}')
+            st.plotly_chart(fig)
+            st.session_state.last_plot = {"type": "Scatter Plot", "x": x_axis, "y": y_axis, "data": df[[x_axis, y_axis]].to_json()}
+            return f"Generated scatter plot of {x_axis} vs {y_axis}"
+    return "Invalid columns for scatter plot."
+def generate_histogram(params):
+    df = st.session_state.cleaned_data
+    x_axis = params.strip()
+    if x_axis in df.columns:
+        fig = px.histogram(df, x=x_axis, title=f'Histogram of {x_axis}')
+        st.plotly_chart(fig)
+        st.session_state.last_plot = {"type": "Histogram", "x": x_axis, "data": df[[x_axis]].to_json()}
+        return f"Generated histogram of {x_axis}"
+    return "Invalid column for histogram."
+# Inference from Plotted Data
+def analyze_plot():
+    if "last_plot" not in st.session_state:
+        return "No plot available to analyze."
+    plot_info = st.session_state.last_plot
+    df = pd.read_json(plot_info["data"])
+    plot_type = plot_info["type"]
+    x_col = plot_info["x"]
+    y_col = plot_info["y"] if "y" in plot_info else None
+    if plot_type == "Scatter Plot" and y_col:
+        correlation = df[x_col].corr(df[y_col])
+        strength = "strong" if abs(correlation) > 0.7 else "moderate" if abs(correlation) > 0.3 else "weak"
+        direction = "positive" if correlation > 0 else "negative"
+        return f"The scatter plot of {x_col} vs {y_col} shows a {strength} {direction} correlation (Pearson r = {correlation:.2f})."
+    elif plot_type == "Histogram":
+        skewness = df[x_col].skew()
+        skew_desc = "positively skewed" if skewness > 1 else "negatively skewed" if skewness < -1 else "approximately symmetric"
+        return f"The histogram of {x_col} is {skew_desc} (skewness = {skewness:.2f})."
+    return "Inference not available for this plot type."
+# Parse Chatbot Commands
+def parse_command(command):
+    command = command.lower().strip()
+    if "drop columns" in command or "drop column" in command:
+        columns = command.replace("drop columns", "").replace("drop column", "").strip()
+        return drop_columns, columns
+    elif "show a scatter plot" in command or "scatter plot of" in command:
+        params = command.replace("show a scatter plot of", "").replace("scatter plot of", "").strip()
+        return generate_scatter_plot, params
+    elif "show a histogram" in command or "histogram of" in command:
+        params = command.replace("show a histogram of", "").replace("histogram of", "").strip()
+        return generate_histogram, params
+    elif "analyze plot" in command:
+        return lambda x: analyze_plot(), None
+    return None, "Command not recognized. Try 'drop columns X, Y', 'scatter plot of X vs Y', or 'analyze plot'."
+# Dataset Preview Function
+def display_dataset_preview():
+    if 'cleaned_data' in st.session_state:
+        st.subheader("Current Dataset Preview")
+        st.dataframe(st.session_state.cleaned_data.head(10), use_container_width=True)
+        st.write("---")
+# Sidebar Navigation with API Key Input
 with st.sidebar:
     st.title("🔮 Data-Vision Pro")
     st.markdown("Your AI-powered data analysis suite with RAG.")
         st.info("🧹 Clean and preprocess your data using various tools.")
     elif app_mode == "EDA":
         st.info("🔍 Explore your data visually and statistically.")
+    # API Key Input Field
+    api_key_input = st.text_input(
+        "Enter your API key (optional)",
+        type="password",
+        help="Enter your API key to override the default. Leave blank to use the app's default key."
+    )
     st.markdown("---")
     st.markdown("**Note**: Requires dependencies in `requirements.txt`.")
     st.markdown("Created by Calvin Allen-Crawford")
     st.markdown("v1.0 | © 2025")
+# Determine which API key to use
+if api_key_input:
+    api_key = api_key_input  # Use the user-provided API key from the sidebar
+else:
+    api_key = st.secrets.get("OPENAI_API_KEY", os.getenv("OPENAI_API_KEY"))  # Fall back to secret or environment variable
+if not api_key:
+    st.error("API key is required. Please provide it in the sidebar or ensure it’s set in the app’s secrets.")
+    st.stop()
+# Initialize OpenAI client with the selected API key
+client = OpenAI(api_key=api_key)
+# Display dataset preview at the top of each page
+display_dataset_preview()
 # Main App Pages
 if app_mode == "Data Upload":
     st.title("📤 Data Upload & Profiling")
     st.header("Upload Your Dataset")
     st.write("Supported formats: CSV, XLSX")
     if 'raw_data' not in st.session_state:
         st.info("It looks like no dataset has been uploaded yet. Would you like to upload a CSV or XLSX file?")
     uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"], key="file_uploader")
     if uploaded_file:
         st.session_state.pop('raw_data', None)
                 st.error("Uploaded file is empty.")
                 st.stop()
             st.session_state.raw_data = df
+            st.session_state.cleaned_data = df.copy()
             st.session_state.dataset_text = convert_csv_to_json_and_text(df)
             if 'data_versions' not in st.session_state:
                 st.session_state.data_versions = [df.copy()]
                 st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
                 st.rerun()
+    with st.expander("🛠️ Data Cleaning Operations", expanded=True):
+        enhance_section_title("🔍 Missing Values Treatment")
+        missing_cols = df.columns[df.isna().any()].tolist()
+        if missing_cols:
+            cols = st.multiselect("Select columns with missing values", missing_cols)
+            method = st.selectbox("Choose imputation method", [
+                "Drop Missing Values", "Fill with Mean/Median", "Fill with Custom Value", "Forward Fill", "Backward Fill"
+            ])
+            if method == "Fill with Custom Value":
+                custom_val = st.text_input("Enter custom value:")
+            if st.button("Apply Missing Value Treatment"):
+                new_df = df.copy()
+                if method == "Drop Missing Values":
+                    new_df = new_df.dropna(subset=cols)
+                elif method == "Fill with Mean/Median":
+                    for col in cols:
+                        if pd.api.types.is_numeric_dtype(new_df[col]):
+                            new_df[col] = new_df[col].fillna(new_df[col].median())
+                        else:
+                            new_df[col] = new_df[col].fillna(new_df[col].mode()[0])
+                elif method == "Fill with Custom Value" and custom_val:
+                    new_df[cols] = new_df[cols].fillna(custom_val)
+                elif method == "Forward Fill":
+                    new_df[cols] = new_df[cols].ffill()
+                elif method == "Backward Fill":
+                    new_df[cols] = new_df[cols].bfill()
+                update_cleaned_data(new_df)
+        else:
+            st.success("✨ No missing values detected!")
+        enhance_section_title("🔄 Data Type Conversion")
+        col_to_convert = st.selectbox("Select column to convert", df.columns)
+        new_type = st.selectbox("Select new data type", ["String", "Integer", "Float", "Boolean", "Datetime"])
+        if new_type == "Datetime":
+            date_format = st.text_input("Enter date format (e.g., %Y-%m-%d):", "%Y-%m-%d")
+        if st.button("Convert Data Type"):
+            new_df = df.copy()
+            if new_type == "String":
+                new_df[col_to_convert] = new_df[col_to_convert].astype(str)
+            elif new_type == "Integer":
+                new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce').astype('Int64')
+            elif new_type == "Float":
+                new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce')
+            elif new_type == "Boolean":
+                new_df[col_to_convert] = new_df[col_to_convert].astype(bool)
+            elif new_type == "Datetime":
+                new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
+            update_cleaned_data(new_df)
+        enhance_section_title("🗑️ Drop Columns")
+        columns_to_drop = st.multiselect("Select columns to remove", df.columns)
+        if columns_to_drop and st.button("Confirm Column Removal"):
+            new_df = df.copy()
+            new_df = new_df.drop(columns=columns_to_drop)
+            update_cleaned_data(new_df)
+        enhance_section_title("🔢 Encoding Options")
+        encoding_method = st.radio("Choose encoding method", ("Label Encoding", "One-Hot Encoding"))
+        data_to_encode = st.multiselect("Select columns to encode", df.select_dtypes(include='object').columns)
+        if data_to_encode and st.button("Apply Encoding"):
+            new_df = df.copy()
+            if encoding_method == "Label Encoding":
+                for col in data_to_encode:
+                    le = LabelEncoder()
+                    new_df[col] = le.fit_transform(new_df[col].astype(str))
+            elif encoding_method == "One-Hot Encoding":
+                new_df = pd.get_dummies(new_df, columns=data_to_encode, drop_first=True, dtype=int)
+            update_cleaned_data(new_df)
+        enhance_section_title("📏 StandardScaler")
+        scale_cols = st.multiselect("Select numerical columns to scale", df.select_dtypes(include=np.number).columns)
+        if scale_cols and st.button("Apply StandardScaler"):
+            new_df = df.copy()
+            scaler = StandardScaler()
+            new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
+            update_cleaned_data(new_df)
+        enhance_section_title("🕵️ Pattern-Based Cleaning")
+        selected_col = st.selectbox("Select text column for pattern cleaning", df.select_dtypes(include='object').columns)
+        pattern = st.text_input("Enter regex pattern:")
+        replacement = st.text_input("Enter replacement value:")
+        if st.button("Apply Pattern Replacement"):
+            new_df = df.copy()
+            new_df[selected_col] = new_df[selected_col].str.replace(pattern, replacement, regex=True)
+            update_cleaned_data(new_df)
 elif app_mode == "EDA":
     st.title("🔍 Interactive Data Explorer")
     if 'cleaned_data' not in st.session_state:
         col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
         col4.metric("Duplicates", df.duplicated().sum())
+    tab1, tab2, tab3 = st.tabs(["Quick Preview", "Column Types", "Missing Matrix"])
+    with tab1:
+        st.write("First few rows of the dataset:")
+        st.dataframe(df.head(), use_container_width=True)
+    with tab2:
+        st.write("Column Data Types:")
+        type_counts = df.dtypes.value_counts().reset_index()
+        type_counts.columns = ['Type', 'Count']
+        st.dataframe(type_counts, use_container_width=True)
+    with tab3:
+        st.write("Missing Values Matrix:")
+        fig_missing = px.imshow(df.isna(), color_continuous_scale=['#e0e0e0', '#66c2a5'])
+        fig_missing.update_layout(coloraxis_colorscale=[[0, 'lightgrey'], [1, '#FF4B4B']])
+        st.plotly_chart(fig_missing, use_container_width=True)
+    enhance_section_title("Interactive Visualization Builder")
+    with st.container():
+        col1, col2 = st.columns([1, 3])
+        with col1:
+            plot_type = st.selectbox("Choose visualization type", [
+                "Scatter Plot", "Histogram", "Box Plot", "Violin Plot", "Line Chart", "Bar Chart",
+                "Correlation Matrix", "Heatmap", "3D Scatter", "Parallel Categories", "Segmented Bar Chart",
+                "Swarm Plot", "Ridge Plot", "Bubble Plot", "Density Plot", "Count Plot", "Lollipop Chart"
+            ])
+            x_axis = st.selectbox("X-axis", df.columns) if plot_type != "Correlation Matrix" else None
+            y_axis = st.selectbox("Y-axis", df.columns) if plot_type in ["Scatter Plot", "Box Plot", "Violin Plot", "Line Chart", "Heatmap", "Swarm Plot", "Ridge Plot", "Bubble Plot", "Density Plot", "Lollipop Chart"] else None
+            z_axis = st.selectbox("Z-axis", df.columns) if plot_type == "3D Scatter" else None
+            color_by = st.selectbox("Color encoding", ["None"] + df.columns.tolist(), format_func=lambda x: "No color" if x == "None" else x) if plot_type != "Correlation Matrix" else None
+            if plot_type == "Parallel Categories":
+                dimensions = st.multiselect("Dimensions", df.columns.tolist(), default=df.columns[:3].tolist())
+            elif plot_type == "Segmented Bar Chart":
+                segment_col = st.selectbox("Segment Column (Categorical)", df.select_dtypes(exclude=np.number).columns)
+            elif plot_type == "Bubble Plot":
+                size_col = st.selectbox("Size Column", df.columns)
+        with col2:
+            try:
+                fig = None
+                if plot_type == "Scatter Plot" and x_axis and y_axis:
+                    fig = px.scatter(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, trendline="lowess", title=f'Scatter Plot of {x_axis} vs {y_axis}')
+                elif plot_type == "Histogram" and x_axis:
+                    fig = px.histogram(df, x=x_axis, color=color_by if color_by != "None" else None, nbins=30, marginal="box", title=f'Histogram of {x_axis}')
+                elif plot_type == "Box Plot" and x_axis and y_axis:
+                    fig = px.box(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Box Plot of {x_axis} vs {y_axis}')
+                elif plot_type == "Violin Plot" and x_axis and y_axis:
+                    fig = px.violin(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, box=True, title=f'Violin Plot of {x_axis} vs {y_axis}')
+                elif plot_type == "Line Chart" and x_axis and y_axis:
+                    fig = px.line(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Line Chart of {x_axis} vs {y_axis}')
+                elif plot_type == "Bar Chart" and x_axis:
+                    fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None, title=f'Bar Chart of {x_axis}')
+                elif plot_type == "Correlation Matrix":
+                    numeric_df = df.select_dtypes(include=np.number)
+                    if len(numeric_df.columns) > 1:
+                        corr = numeric_df.corr()
+                        fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title='Correlation Matrix')
+                elif plot_type == "Heatmap" and x_axis and y_axis:
+                    fig = px.density_heatmap(df, x=x_axis, y=y_axis, facet_col=color_by if color_by != "None" else None, title=f'Heatmap of {x_axis} vs {y_axis}')
+                elif plot_type == "3D Scatter" and x_axis and y_axis and z_axis:
+                    fig = px.scatter_3d(df, x=x_axis, y=y_axis, z=z_axis, color=color_by if color_by != "None" else None, title=f'3D Scatter Plot of {x_axis} vs {y_axis} vs {z_axis}')
+                elif plot_type == "Parallel Categories" and dimensions:
+                    fig = px.parallel_categories(df, dimensions=dimensions, color=color_by if color_by != "None" else None, title='Parallel Categories Plot')
+                elif plot_type == "Segmented Bar Chart" and x_axis and segment_col:
+                    segment_counts = df.groupby([x_axis, segment_col]).size().reset_index(name='counts')
+                    fig = px.bar(segment_counts, x=x_axis, y='counts', color=segment_col, title=f'Segmented Bar Chart of {x_axis} by {segment_col}')
+                    fig.update_layout(yaxis_title="Count")
+                elif plot_type == "Swarm Plot" and x_axis and y_axis:
+                    fig = px.strip(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Swarm Plot of {x_axis} vs {y_axis}')
+                elif plot_type == "Ridge Plot" and x_axis and y_axis:
+                    fig = px.histogram(df, x=x_axis, color=y_axis, marginal="rug", title=f'Ridge Plot of {x_axis} by {y_axis}')
+                elif plot_type == "Bubble Plot" and x_axis and y_axis and size_col:
+                    fig = px.scatter(df, x=x_axis, y=y_axis, size=size_col, color=color_by if color_by != "None" else None, title=f'Bubble Plot of {x_axis} vs {y_axis}')
+                elif plot_type == "Density Plot" and x_axis and y_axis:
+                    fig = px.density_heatmap(df, x=x_axis, y=y_axis, color_continuous_scale="Viridis", title=f'Density Plot of {x_axis} vs {y_axis}')
+                elif plot_type == "Count Plot" and x_axis:
+                    fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None, title=f'Count Plot of {x_axis}')
+                    fig.update_layout(yaxis_title="Count")
+                elif plot_type == "Lollipop Chart" and x_axis and y_axis:
+                    fig = go.Figure()
+                    fig.add_trace(go.Scatter(x=df[x_axis], y=df[y_axis], mode='markers', marker=dict(size=10)))
+                    for i in range(len(df)):
+                        fig.add_trace(go.Scatter(x=[df[x_axis].iloc[i], df[x_axis].iloc[i]], y=[0, df[y_axis].iloc[i]], mode='lines', line=dict(color='gray')))
+                    fig.update_layout(showlegend=False, title=f'Lollipop Chart of {x_axis} vs {y_axis}')
+                if fig:
+                    fig.update_layout(template="plotly_white")
+                    st.plotly_chart(fig, use_container_width=True)
+                    st.session_state.last_plot = {
+                        "type": plot_type,
+                        "x": x_axis,
+                        "y": y_axis,
+                        "z": z_axis,
+                        "color": color_by if color_by != "None" else None,
+                        "data": df[[x_axis, y_axis] + ([z_axis] if z_axis else [])].to_json() if x_axis and y_axis else df[[x_axis]].to_json()
+                    }
+                else:
+                    st.error("Please provide required inputs for the selected plot type.")
+            except Exception as e:
+                st.error(f"Couldn't create visualization: {str(e)}")
 # Chatbot Section
 st.markdown("---")
 st.subheader("💬 AI Chatbot Assistant (RAG Enabled)")
+st.info("Ask me about the app or your data! Try: 'drop columns X, Y', 'scatter plot of X vs Y', or 'analyze plot'")
 if "chat_history" not in st.session_state:
     st.session_state.chat_history = []
     st.session_state.chat_history.append({"role": "user", "content": user_input})
     with st.chat_message("user"):
         st.markdown(user_input)
+    with st.spinner("Processing..."):
         dataset_text = st.session_state.get("dataset_text", "")
+        func, param = parse_command(user_input)
+        if func:
+            response = func(param) if param else func(None)
+        else:
+            response = get_chatbot_response(user_input, app_mode, dataset_text)
         st.session_state.chat_history.append({"role": "assistant", "content": response})
     with st.chat_message("assistant"):
         st.markdown(response)