Spaces:

CosmickVisions
/

Neural-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Mar 14

Commit

5842911

verified ·

1 Parent(s): 5b73495

Update app.py

Browse files

Files changed (1) hide show

app.py +238 -231

app.py CHANGED Viewed

@@ -1,252 +1,259 @@
 import streamlit as st
 import pandas as pd
-import plotly.express as px
 import numpy as np
-from pycaret.classification import *
-from pycaret.regression import *
-from pycaret.clustering import *
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
-import mlflow
 import requests
 import json
 import os
-# Set page config
-st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide")
-# MLflow Tracking
-mlflow.set_tracking_uri("http://127.0.0.1:5000")
-mlflow.set_experiment("Neural-Vision Enhanced")
-# Initialize session state
-if 'metrics' not in st.session_state:
-    st.session_state.metrics = {}
-if 'chat_history' not in st.session_state:
-    st.session_state.chat_history = []
-# Enhanced Visualization Functions
-def visualize_classification():
-    col1, col2 = st.columns(2)
-    with col1:
-        plot_model(st.session_state.best_model, plot='confusion_matrix', display_format='streamlit')
-    with col2:
-        plot_model(st.session_state.best_model, plot='auc', display_format='streamlit')
-    col3, col4 = st.columns(2)
-    with col3:
-        plot_model(st.session_state.best_model, plot='feature', display_format='streamlit')
-    with col4:
-        plot_model(st.session_state.best_model, plot='pr', display_format='streamlit')
-def visualize_regression():
-    col1, col2 = st.columns(2)
-    with col1:
-        plot_model(st.session_state.best_model, plot='residuals', display_format='streamlit')
-    with col2:
-        plot_model(st.session_state.best_model, plot='error', display_format='streamlit')
-    col3, col4 = st.columns(2)
-    with col3:
-        plot_model(st.session_state.best_model, plot='cooks', display_format='streamlit')
-    with col4:
-        plot_model(st.session_state.best_model, plot='learning', display_format='streamlit')
-def visualize_clustering():
-    col1, col2 = st.columns(2)
-    with col1:
-        plot_model(st.session_state.best_model, plot='cluster', display_format='streamlit')
-    with col2:
-        plot_model(st.session_state.best_model, plot='distribution', display_format='streamlit')
-    col3, col4 = st.columns(2)
-    with col3:
-        plot_model(st.session_state.best_model, plot='elbow', display_format='streamlit')
-    with col4:
-        plot_model(st.session_state.best_model, plot='silhouette', display_format='streamlit')
-# Enhanced Context Generator
-def get_app_context():
-    context = {
-        "current_state": {
-            "active_page": st.session_state.get('active_page', 'Data Upload'),
-            "dataset_stats": {},
-            "model_metrics": st.session_state.metrics,
-            "problem_type": st.session_state.get('problem_type'),
-            "target": st.session_state.get('target'),
-            "best_model": str(st.session_state.get('best_model', None))
-        },
-        "app_capabilities": [
-            "CSV data upload and statistical analysis",
-            "Automated EDA report generation",
-            "PyCaret-powered model training for classification, regression, and clustering",
-            "Advanced model evaluation visualizations",
-            "ML experiment tracking with MLflow",
-            "AI-powered analysis through DeepSeek integration"
-        ]
-    }
-    if 'df' in st.session_state:
-        df = st.session_state.df
-        context["current_state"]["dataset_stats"] = {
-            "rows": df.shape[0],
-            "columns": df.shape[1],
-            "missing_values": df.isna().sum().sum(),
-            "columns": {col: str(df[col].dtype) for col in df.columns}
-        }
-    return json.dumps(context)
-# Chatbot Handler
-def handle_ai_query(prompt):
     try:
-        response = requests.post(
-            "http://127.0.0.1:5001/analyze",
-            json={
-                "prompt": prompt,
-                "context": get_app_context(),
-                "metrics": st.session_state.metrics
-            }
         )
-        return response.json().get("analysis", "Error in analysis")
     except Exception as e:
-        return f"Analysis error: {str(e)}"
-# Main App Components
-def data_upload_page():
-    st.title("📤 Data Upload & Analysis")
-    uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
-    if uploaded_file:
-        df = pd.read_csv(uploaded_file)
-        st.session_state.df = df
-        st.session_state.metrics = {}
-        st.subheader("Dataset Health Check")
-        col1, col2, col3 = st.columns(3)
-        col1.metric("Total Samples", df.shape[0])
-        col2.metric("Features", df.shape[1])
-        col3.metric("Missing Values", df.isna().sum().sum())
-        if st.button("Generate Full Profile Report"):
-            with st.spinner("Generating report..."):
-                pr = ProfileReport(df, explorative=True)
-                st_profile_report(pr)
-def model_training_page():
-    st.title("🧠 Model Training Studio")
-    if 'df' not in st.session_state:
-        st.warning("Upload data first!")
-        return
-    df = st.session_state.df
-    problem_type = st.selectbox("Select Problem Type",
-                              ["Classification", "Regression", "Clustering"])
-    if problem_type != "Clustering":
-        target = st.selectbox("Select Target Variable", df.columns)
-        st.session_state.target = target
-    if st.button("Initialize Training Environment"):
-        with st.spinner("Configuring PyCaret..."):
-            if problem_type == "Classification":
-                classification_setup(df, target=target, session_id=42)
-            elif problem_type == "Regression":
-                regression_setup(df, target=target, session_id=42)
             else:
-                clustering_setup(df, session_id=42)
-            st.session_state.problem_type = problem_type
-            st.success("Environment ready for modeling!")
-    if 'problem_type' in st.session_state:
-        st.subheader("Model Training Dashboard")
-        if st.session_state.problem_type in ["Classification", "Regression"]:
-            compare_models = st.checkbox("Compare Multiple Models", True)
-            n_models = st.slider("Number of Models", 1, 15, 5) if compare_models else 1
-            if st.button("Start Training"):
-                with st.spinner("Training in progress..."):
-                    if compare_models:
-                        models = compare_models(n_select=n_models)
-                        st.session_state.best_model = models[0]
-                    else:
-                        st.session_state.best_model = create_model()
-                    # Capture metrics
-                    results = pull()
-                    st.session_state.metrics = results.to_dict()
-                    st.success(f"Best Model: {st.session_state.best_model}")
-                    # Log to MLflow
-                    with mlflow.start_run():
-                        mlflow.log_metrics(results.iloc[0].to_dict())
-                        mlflow.sklearn.log_model(st.session_state.best_model, "model")
-def visualization_page():
-    st.title("🔍 Model Evaluation Center")
-    if 'best_model' not in st.session_state:
-        st.warning("Train a model first!")
-        return
-    st.subheader("Performance Analysis")
-    if st.session_state.problem_type == "Classification":
-        visualize_classification()
-    elif st.session_state.problem_type == "Regression":
-        visualize_regression()
-    else:
-        visualize_clustering()
-    st.subheader("Metric Analysis")
-    st.dataframe(pd.DataFrame.from_dict(st.session_state.metrics))
-    if st.button("Request AI Analysis"):
-        analysis = handle_ai_query("Analyze these model metrics")
-        st.markdown(f"**AI Analysis:**\n\n{analysis}")
-# Chatbot Interface
-def ai_assistant():
-    st.markdown("---")
-    st.subheader("🧠 Neural Insight Assistant")
-    for msg in st.session_state.chat_history:
-        st.chat_message(msg["role"]).write(msg["content"])
-    if prompt := st.chat_input("Ask about models, data, or app usage"):
-        st.session_state.chat_history.append({"role": "user", "content": prompt})
-        st.chat_message("user").write(prompt)
-        response = handle_ai_query(prompt)
-        st.session_state.chat_history.append({"role": "assistant", "content": response})
-        st.chat_message("assistant").write(response)
-# App Layout
-with st.sidebar:
-    st.title("🔮 Neural-Vision Enhanced")
-    page = st.selectbox("Navigation", [
-        "Data Upload & Analysis",
-        "Model Training Studio",
-        "Model Evaluation Center"
-    ])
-    st.session_state.active_page = page
-    st.markdown("---")
-    st.markdown("**DeepSeek API Key**")
-    os.environ["DEEPSEEK_API_KEY"] = st.text_input(
-        "Enter API Key:", type="password",
-        help="Required for AI analysis features"
-    )
     st.markdown("---")
-    st.markdown("v4.0 | © 2025 Neural-Vision")
-# Page Routing
-if "Data Upload & Analysis" in page:
-    data_upload_page()
-elif "Model Training Studio" in page:
-    model_training_page()
-else:
-    visualization_page()
-ai_assistant()

+# app_combined.py
 import streamlit as st
 import pandas as pd
 import numpy as np
+import plotly.express as px
+import plotly.graph_objects as go
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
 import requests
 import json
+from datetime import datetime
+import re
+import tempfile
+from scipy import stats
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
+from sklearn.decomposition import PCA
+import streamlit.components.v1 as components
+from io import StringIO
+from dotenv import load_dotenv
+from flask import Flask, request, jsonify
+from flask_cors import CORS
+import openai
 import os
+# Load environment variables
+load_dotenv()
+# Flask server setup
+app = Flask(__name__)
+CORS(app)
+# Configure DeepSeek API
+openai.api_key = os.getenv("DEEPSEEK_API_KEY")
+openai.api_base = "https://api.deepseek.com/v1"
+# System prompt for the AI assistant
+SYSTEM_PROMPT = '''
+You are Neural Analyst, an AI assistant for the Neural-Vision Enhanced analytics platform.
+Your capabilities include:
+1. Explaining model metrics and evaluation visualizations
+2. Interpreting dataset statistics and EDA reports
+3. Guiding users through app functionality
+4. Providing data science insights
+5. Comparing different model performances
+Always consider:
+- Current dataset statistics: {dataset_stats}
+- Active problem type: {problem_type}
+- Model metrics: {metrics}
+- App state: {active_page}
+'''
+@app.route('/analyze', methods=['POST'])
+def analyze():
     try:
+        data = request.json
+        context = json.loads(data['context'])
+        # Construct the prompt for DeepSeek
+        prompt = f'''
+        User Query: {data['prompt']}
+        Current Context:
+        - Active Page: {context['current_state']['active_page']}
+        - Problem Type: {context['current_state']['problem_type']}
+        - Target Variable: {context['current_state']['target']}
+        - Dataset Shape: {context['current_state']['dataset_stats'].get('rows', 0)} rows,
+          {context['current_state']['dataset_stats'].get('columns', 0)} columns
+        - Model Metrics: {json.dumps(context['current_state']['model_metrics'])}
+        '''
+        # Call DeepSeek API
+        response = openai.ChatCompletion.create(
+            model="deepseek-chat",
+            messages=[{
+                "role": "system",
+                "content": SYSTEM_PROMPT.format(**context['current_state'])
+            }, {
+                "role": "user",
+                "content": prompt
+            }],
+            temperature=0.3,
+            max_tokens=500
         )
+        return jsonify({"analysis": response.choices[0].message.content})
     except Exception as e:
+        return jsonify({"error": str(e)}), 500
+# Streamlit app
+def run_streamlit_app():
+    # Flask server URL
+    FLASK_URL = "http://localhost:5000/analyze"
+    # Helper Functions
+    def enhance_section_title(title):
+        st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{title}</h2>", unsafe_allow_html=True)
+    def convert_csv_to_json_and_text(df):
+        """Convert DataFrame to JSON and then to plain text."""
+        json_data = df.to_json(orient="records")
+        data_dict = json.loads(json_data)
+        text_summary = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
+        text_summary += f"Missing Values: {df.isna().sum().sum()}\n"
+        text_summary += "Columns:\n"
+        for col in df.columns:
+            text_summary += f"- {col} ({df[col].dtype}): "
+            if pd.api.types.is_numeric_dtype(df[col]):
+                text_summary += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
             else:
+                text_summary += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
+            text_summary += f", Missing={df[col].isna().sum()}\n"
+        return text_summary
+    def get_chatbot_response(user_input, app_mode, dataset_text=""):
+        """Send request to Flask server for chatbot response."""
+        payload = {
+            "user_input": user_input,
+            "app_mode": app_mode,
+            "dataset_text": dataset_text
+        }
+        try:
+            response = requests.post(FLASK_URL, json=payload)
+            response.raise_for_status()
+            return response.json().get("response", "Error: No response from server")
+        except requests.exceptions.RequestException as e:
+            return f"Error: Could not connect to Flask server. {str(e)}"
+    # Sidebar Navigation
+    with st.sidebar:
+        st.title("🔮 Data-Vision Pro")
+        st.markdown("Your AI-powered data analysis suite.")
+        st.markdown("---")
+        app_mode = st.selectbox(
+            "Navigation",
+            ["Data Upload", "Data Cleaning", "EDA"],
+            format_func=lambda x: f"📌 {x}"
+        )
+        if app_mode == "Data Upload":
+            st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
+        elif app_mode == "Data Cleaning":
+            st.info("🧹 Clean and preprocess your data using various tools.")
+        elif app_mode == "EDA":
+            st.info("🔍 Explore your data visually and statistically.")
+        st.markdown("---")
+        st.markdown("**Note**: Requires `ydata-profiling`, `requests`, `flask`. Install via `pip install ydata-profiling requests flask`.")
+        if 'cleaned_data' in st.session_state:
+            csv = st.session_state.cleaned_data.to_csv(index=False)
+            st.download_button(
+                label="Download Cleaned Data as CSV",
+                data=csv,
+                file_name='cleaned_data.csv',
+                mime='text/csv',
+            )
+        st.markdown("Created by Calvin Allen-Crawford")
+        st.markdown("v1.0 | © 2025")
+    # Main App Pages
+    if app_mode == "Data Upload":
+        st.title("📤 Data Upload & Analysis")
+        uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
+        if uploaded_file:
+            try:
+                df = pd.read_csv(uploaded_file)
+                st.session_state.df = df
+                st.session_state.metrics = {}
+                st.subheader("Dataset Health Check")
+                col1, col2, col3 = st.columns(3)
+                col1.metric("Total Samples", df.shape[0])
+                col2.metric("Features", df.shape[1])
+                col3.metric("Missing Values", df.isna().sum().sum())
+                if st.button("Generate Full Profile Report"):
+                    with st.spinner("Generating report..."):
+                        pr = ProfileReport(df, explorative=True)
+                        st_profile_report(pr)
+            except Exception as e:
+                st.error(f"Error reading the file: {str(e)}")
+    elif app_mode == "Data Cleaning":
+        st.title("🧹 Smart Data Cleaning")
+        st.header("Preprocess and Transform Your Data")
+        if 'raw_data' not in st.session_state:
+            st.warning("Please upload data first in the Data Upload section.")
+            st.stop()
+        if 'cleaned_data' not in st.session_state:
+            st.session_state.cleaned_data = st.session_state.raw_data.copy()
+        df = st.session_state.cleaned_data.copy()
+        enhance_section_title("📊 Data Health Dashboard")
+        with st.expander("Explore Data Health Metrics", expanded=True):
+            col1, col2, col3 = st.columns(3)
+            with col1: st.metric("Columns", len(df.columns))
+            with col2: st.metric("Rows", len(df))
+            with col3: st.metric("Missing Values", df.isna().sum().sum())
+            if st.button("Generate Detailed Health Report"):
+                with st.spinner("Generating report..."):
+                    profile = ProfileReport(df, minimal=True)
+                    st_profile_report(profile)
+            if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
+                if st.button("Undo Last Action"):
+                    st.session_state.data_versions.pop()
+                    st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
+                    st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
+                    st.rerun()
+    elif app_mode == "EDA":
+        st.title("🔍 Interactive Data Explorer")
+        if 'cleaned_data' not in st.session_state:
+            st.warning("Please upload and clean data first.")
+            st.stop()
+        df = st.session_state.cleaned_data.copy()
+        enhance_section_title("Dataset Overview")
+        with st.container():
+            col1, col2, col3, col4 = st.columns(4)
+            col1.metric("Total Rows", df.shape[0])
+            col2.metric("Total Columns", df.shape[1])
+            missing_percentage = df.isna().sum().sum() / df.size * 100
+            col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
+            col4.metric("Duplicates", df.duplicated().sum())
+    # Chatbot Section
     st.markdown("---")
+    st.subheader("💬 AI Chatbot Assistant")
+    st.info("Ask me about the app or your data! Try: 'What can I do here?' or 'What's in the dataset?'")
+    if "chat_history" not in st.session_state:
+        st.session_state.chat_history = []
+    for message in st.session_state.chat_history:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
+    user_input = st.chat_input("Ask me anything about the app or your data...")
+    if user_input:
+        st.session_state.chat_history.append({"role": "user", "content": user_input})
+        with st.chat_message("user"):
+            st.markdown(user_input)
+        with st.spinner("Thinking..."):
+            dataset_text = st.session_state.get("dataset_text", "")
+            response = get_chatbot_response(user_input, app_mode, dataset_text)
+            st.session_state.chat_history.append({"role": "assistant", "content": response})
+        with st.chat_message("assistant"):
+            st.markdown(response)
+if __name__ == '__main__':
+    # Run Flask server in a separate thread
+    from threading import Thread
+    flask_thread = Thread(target=lambda: app.run(host='0.0.0.0', port=5000))
+    flask_thread.start()
+    # Run Streamlit app
+    run_streamlit_app()