Spaces:

CosmickVisions
/

Neural-Vision

Sleeping

App Files Files Community

CosmickVisions commited on Mar 15

Commit

b4c02a4

verified ·

1 Parent(s): fcc0622

Update app.py

Browse files

Files changed (1) hide show

app.py +205 -239

app.py CHANGED Viewed

@@ -1,259 +1,225 @@
-# app_combined.py
 import streamlit as st
 import pandas as pd
-import numpy as np
 import plotly.express as px
-import plotly.graph_objects as go
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
 import requests
 import json
-from datetime import datetime
-import re
-import tempfile
-from scipy import stats
-from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
-from sklearn.decomposition import PCA
-import streamlit.components.v1 as components
-from io import StringIO
-from dotenv import load_dotenv
-from flask import Flask, request, jsonify
-from flask_cors import CORS
-import openai
 import os
-# Load environment variables
-load_dotenv()
-# Flask server setup
-app = Flask(__name__)
-CORS(app)
-# Configure DeepSeek API
-openai.api_key = os.getenv("DEEPSEEK_API_KEY")
-openai.api_base = "https://api.deepseek.com/v1"
-# System prompt for the AI assistant
-SYSTEM_PROMPT = '''
-You are Neural Analyst, an AI assistant for the Neural-Vision Enhanced analytics platform.
-Your capabilities include:
-1. Explaining model metrics and evaluation visualizations
-2. Interpreting dataset statistics and EDA reports
-3. Guiding users through app functionality
-4. Providing data science insights
-5. Comparing different model performances
-Always consider:
-- Current dataset statistics: {dataset_stats}
-- Active problem type: {problem_type}
-- Model metrics: {metrics}
-- App state: {active_page}
-'''
-@app.route('/analyze', methods=['POST'])
-def analyze():
     try:
-        data = request.json
-        context = json.loads(data['context'])
-        # Construct the prompt for DeepSeek
-        prompt = f'''
-        User Query: {data['prompt']}
-        Current Context:
-        - Active Page: {context['current_state']['active_page']}
-        - Problem Type: {context['current_state']['problem_type']}
-        - Target Variable: {context['current_state']['target']}
-        - Dataset Shape: {context['current_state']['dataset_stats'].get('rows', 0)} rows,
-          {context['current_state']['dataset_stats'].get('columns', 0)} columns
-        - Model Metrics: {json.dumps(context['current_state']['model_metrics'])}
-        '''
-        # Call DeepSeek API
-        response = openai.ChatCompletion.create(
-            model="deepseek-chat",
-            messages=[{
-                "role": "system",
-                "content": SYSTEM_PROMPT.format(**context['current_state'])
-            }, {
-                "role": "user",
-                "content": prompt
-            }],
-            temperature=0.3,
-            max_tokens=500
         )
-        return jsonify({"analysis": response.choices[0].message.content})
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
-# Streamlit app
-def run_streamlit_app():
-    # Flask server URL
-    FLASK_URL = "http://localhost:5000/analyze"
-    # Helper Functions
-    def enhance_section_title(title):
-        st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{title}</h2>", unsafe_allow_html=True)
-    def convert_csv_to_json_and_text(df):
-        """Convert DataFrame to JSON and then to plain text."""
-        json_data = df.to_json(orient="records")
-        data_dict = json.loads(json_data)
-        text_summary = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
-        text_summary += f"Missing Values: {df.isna().sum().sum()}\n"
-        text_summary += "Columns:\n"
-        for col in df.columns:
-            text_summary += f"- {col} ({df[col].dtype}): "
-            if pd.api.types.is_numeric_dtype(df[col]):
-                text_summary += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
-            else:
-                text_summary += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
-            text_summary += f", Missing={df[col].isna().sum()}\n"
-        return text_summary
-    def get_chatbot_response(user_input, app_mode, dataset_text=""):
-        """Send request to Flask server for chatbot response."""
-        payload = {
-            "user_input": user_input,
-            "app_mode": app_mode,
-            "dataset_text": dataset_text
-        }
-        try:
-            response = requests.post(FLASK_URL, json=payload)
-            response.raise_for_status()
-            return response.json().get("response", "Error: No response from server")
-        except requests.exceptions.RequestException as e:
-            return f"Error: Could not connect to Flask server. {str(e)}"
-    # Sidebar Navigation
-    with st.sidebar:
-        st.title("🔮 Data-Vision Pro")
-        st.markdown("Your AI-powered data analysis suite.")
-        st.markdown("---")
-        app_mode = st.selectbox(
-            "Navigation",
-            ["Data Upload", "Data Cleaning", "EDA"],
-            format_func=lambda x: f"📌 {x}"
-        )
-        if app_mode == "Data Upload":
-            st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
-        elif app_mode == "Data Cleaning":
-            st.info("🧹 Clean and preprocess your data using various tools.")
-        elif app_mode == "EDA":
-            st.info("🔍 Explore your data visually and statistically.")
-        st.markdown("---")
-        st.markdown("**Note**: Requires `ydata-profiling`, `requests`, `flask`. Install via `pip install ydata-profiling requests flask`.")
-        if 'cleaned_data' in st.session_state:
-            csv = st.session_state.cleaned_data.to_csv(index=False)
-            st.download_button(
-                label="Download Cleaned Data as CSV",
-                data=csv,
-                file_name='cleaned_data.csv',
-                mime='text/csv',
-            )
-        st.markdown("Created by Calvin Allen-Crawford")
-        st.markdown("v1.0 | © 2025")
-    # Main App Pages
-    if app_mode == "Data Upload":
-        st.title("📤 Data Upload & Analysis")
-        uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
-        if uploaded_file:
-            try:
-                df = pd.read_csv(uploaded_file)
-                st.session_state.df = df
-                st.session_state.metrics = {}
-                st.subheader("Dataset Health Check")
-                col1, col2, col3 = st.columns(3)
-                col1.metric("Total Samples", df.shape[0])
-                col2.metric("Features", df.shape[1])
-                col3.metric("Missing Values", df.isna().sum().sum())
-                if st.button("Generate Full Profile Report"):
-                    with st.spinner("Generating report..."):
-                        pr = ProfileReport(df, explorative=True)
-                        st_profile_report(pr)
-            except Exception as e:
-                st.error(f"Error reading the file: {str(e)}")
-    elif app_mode == "Data Cleaning":
-        st.title("🧹 Smart Data Cleaning")
-        st.header("Preprocess and Transform Your Data")
-        if 'raw_data' not in st.session_state:
-            st.warning("Please upload data first in the Data Upload section.")
-            st.stop()
-        if 'cleaned_data' not in st.session_state:
-            st.session_state.cleaned_data = st.session_state.raw_data.copy()
-        df = st.session_state.cleaned_data.copy()
-        enhance_section_title("📊 Data Health Dashboard")
-        with st.expander("Explore Data Health Metrics", expanded=True):
-            col1, col2, col3 = st.columns(3)
-            with col1: st.metric("Columns", len(df.columns))
-            with col2: st.metric("Rows", len(df))
-            with col3: st.metric("Missing Values", df.isna().sum().sum())
-            if st.button("Generate Detailed Health Report"):
-                with st.spinner("Generating report..."):
-                    profile = ProfileReport(df, minimal=True)
-                    st_profile_report(profile)
-            if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
-                if st.button("Undo Last Action"):
-                    st.session_state.data_versions.pop()
-                    st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
-                    st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
-                    st.rerun()
-    elif app_mode == "EDA":
-        st.title("🔍 Interactive Data Explorer")
-        if 'cleaned_data' not in st.session_state:
-            st.warning("Please upload and clean data first.")
-            st.stop()
-        df = st.session_state.cleaned_data.copy()
-        enhance_section_title("Dataset Overview")
-        with st.container():
-            col1, col2, col3, col4 = st.columns(4)
-            col1.metric("Total Rows", df.shape[0])
-            col2.metric("Total Columns", df.shape[1])
-            missing_percentage = df.isna().sum().sum() / df.size * 100
-            col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
-            col4.metric("Duplicates", df.duplicated().sum())
-    # Chatbot Section
     st.markdown("---")
-    st.subheader("💬 AI Chatbot Assistant")
-    st.info("Ask me about the app or your data! Try: 'What can I do here?' or 'What's in the dataset?'")
-    if "chat_history" not in st.session_state:
-        st.session_state.chat_history = []
-    for message in st.session_state.chat_history:
-        with st.chat_message(message["role"]):
-            st.markdown(message["content"])
-    user_input = st.chat_input("Ask me anything about the app or your data...")
-    if user_input:
-        st.session_state.chat_history.append({"role": "user", "content": user_input})
-        with st.chat_message("user"):
-            st.markdown(user_input)
-        with st.spinner("Thinking..."):
-            dataset_text = st.session_state.get("dataset_text", "")
-            response = get_chatbot_response(user_input, app_mode, dataset_text)
-            st.session_state.chat_history.append({"role": "assistant", "content": response})
-        with st.chat_message("assistant"):
-            st.markdown(response)
-if __name__ == '__main__':
-    # Run Flask server in a separate thread
-    from threading import Thread
-    flask_thread = Thread(target=lambda: app.run(host='0.0.0.0', port=5000))
-    flask_thread.start()
-    # Run Streamlit app
-    run_streamlit_app()

 import streamlit as st
 import pandas as pd
 import plotly.express as px
+import numpy as np
+from pycaret.classification import *
+from pycaret.regression import *
+from pycaret.clustering import *
 from ydata_profiling import ProfileReport
 from streamlit_pandas_profiling import st_profile_report
+import mlflow
 import requests
 import json
 import os
+# Set page config
+st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide")
+# MLflow Tracking
+mlflow.set_tracking_uri("http://127.0.0.1:5000")
+mlflow.set_experiment("Neural-Vision Enhanced")
+# Initialize session state
+st.session_state.setdefault('metrics', {})
+st.session_state.setdefault('chat_history', [])
+# Enhanced Visualization Functions
+def visualize_model(model, plots):
+    cols = st.columns(len(plots))
+    for col, plot in zip(cols, plots):
+        with col:
+            plot_model(model, plot=plot, display_format='streamlit')
+def visualize_classification():
+    visualize_model(st.session_state.best_model, ['confusion_matrix', 'auc', 'feature', 'pr'])
+def visualize_regression():
+    visualize_model(st.session_state.best_model, ['residuals', 'error', 'cooks', 'learning'])
+def visualize_clustering():
+    visualize_model(st.session_state.best_model, ['cluster', 'distribution', 'elbow', 'silhouette'])
+# Enhanced Context Generator
+def get_app_context():
+    df_stats = {}
+    if 'df' in st.session_state:
+        df = st.session_state.df
+        df_stats = {
+            "rows": df.shape[0],
+            "columns": df.shape[1],
+            "missing_values": df.isna().sum().sum(),
+            "columns": {col: str(df[col].dtype) for col in df.columns}
+        }
+    context = {
+        "current_state": {
+            "active_page": st.session_state.get('active_page', 'Data Upload'),
+            "dataset_stats": df_stats,
+            "model_metrics": st.session_state.metrics,
+            "problem_type": st.session_state.get('problem_type'),
+            "target": st.session_state.get('target'),
+            "best_model": str(st.session_state.get('best_model', None))
+        },
+        "app_capabilities": [
+            "CSV data upload and statistical analysis",
+            "Automated EDA report generation",
+            "PyCaret-powered model training for classification, regression, and clustering",
+            "Advanced model evaluation visualizations",
+            "ML experiment tracking with MLflow",
+            "AI-powered analysis through DeepSeek integration"
+        ]
+    }
+    return json.dumps(context)
+# Chatbot Handler
+def handle_ai_query(prompt):
     try:
+        response = requests.post(
+            "http://127.0.0.1:5001/analyze",
+            json={
+                "prompt": prompt,
+                "context": get_app_context(),
+                "metrics": st.session_state.metrics
+            }
         )
+        return response.json().get("analysis", "Error in analysis")
     except Exception as e:
+        return f"Analysis error: {str(e)}"
+# Main App Components
+def data_upload_page():
+    st.title("📤 Data Upload & Analysis")
+    uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
+    if uploaded_file:
+        df = pd.read_csv(uploaded_file)
+        st.session_state.df = df
+        st.session_state.metrics = {}
+        st.subheader("Dataset Health Check")
+        col1, col2, col3 = st.columns(3)
+        col1.metric("Total Samples", df.shape[0])
+        col2.metric("Features", df.shape[1])
+        col3.metric("Missing Values", df.isna().sum().sum())
+        if st.button("Generate Full EDA Report"):
+            with st.spinner("Generating comprehensive analysis..."):
+                profile = ProfileReport(df, explorative=True)
+                st_profile_report(profile)
+def model_training_page():
+    st.title("🧠 Model Training Studio")
+    if 'df' not in st.session_state:
+        st.warning("Upload data first!")
+        return
+    df = st.session_state.df
+    problem_type = st.selectbox("Select Problem Type", ["Classification", "Regression", "Clustering"])
+    if problem_type != "Clustering":
+        st.session_state.target = st.selectbox("Select Target Variable", df.columns)
+    if st.button("Initialize Training Environment"):
+        with st.spinner("Configuring PyCaret..."):
+            setup_func = {
+                "Classification": classification_setup,
+                "Regression": regression_setup,
+                "Clustering": clustering_setup
+            }[problem_type]
+            setup_func(df, target=st.session_state.get('target'), session_id=42)
+            st.session_state.problem_type = problem_type
+            st.success("Environment ready for modeling!")
+    if 'problem_type' in st.session_state:
+        st.subheader("Model Training Dashboard")
+        if st.session_state.problem_type in ["Classification", "Regression"]:
+            compare_models = st.checkbox("Compare Multiple Models", True)
+            n_models = st.slider("Number of Models", 1, 15, 5) if compare_models else 1
+            if st.button("Start Training"):
+                with st.spinner("Training in progress..."):
+                    if compare_models:
+                        models = compare_models(n_select=n_models)
+                        st.session_state.best_model = models[0]
+                    else:
+                        st.session_state.best_model = create_model()
+                    # Capture metrics
+                    results = pull()
+                    st.session_state.metrics = results.to_dict()
+                    st.success(f"Best Model: {st.session_state.best_model}")
+                    # Log to MLflow
+                    with mlflow.start_run():
+                        mlflow.log_metrics(results.iloc[0].to_dict())
+                        mlflow.sklearn.log_model(st.session_state.best_model, "model")
+def visualization_page():
+    st.title("🔍 Model Evaluation Center")
+    if 'best_model' not in st.session_state:
+        st.warning("Train a model first!")
+        return
+    st.subheader("Performance Analysis")
+    visualizers = {
+        "Classification": visualize_classification,
+        "Regression": visualize_regression,
+        "Clustering": visualize_clustering
+    }
+    visualizers[st.session_state.problem_type]()
+    st.subheader("Metric Analysis")
+    st.dataframe(pd.DataFrame.from_dict(st.session_state.metrics))
+    if st.button("Request AI Analysis"):
+        analysis = handle_ai_query("Analyze these model metrics")
+        st.markdown(f"**AI Analysis:**\n\n{analysis}")
+# Chatbot Interface
+def ai_assistant():
     st.markdown("---")
+    st.subheader("🧠 Neural Insight Assistant")
+    for msg in st.session_state.chat_history:
+        st.chat_message(msg["role"]).write(msg["content"])
+    if prompt := st.chat_input("Ask about models, data, or app usage"):
+        st.session_state.chat_history.append({"role": "user", "content": prompt})
+        st.chat_message("user").write(prompt)
+        response = handle_ai_query(prompt)
+        st.session_state.chat_history.append({"role": "assistant", "content": response})
+        st.chat_message("assistant").write(response)
+# App Layout
+with st.sidebar:
+    st.title("🔮 Neural-Vision Enhanced")
+    page = st.selectbox("Navigation", [
+        "Data Upload & Analysis",
+        "Model Training Studio",
+        "Model Evaluation Center"
+    ])
+    st.session_state.active_page = page
+    st.markdown("---")
+    st.markdown("**DeepSeek API Key**")
+    os.environ["DEEPSEEK_API_KEY"] = st.text_input(
+        "Enter API Key:", type="password",
+        help="Required for AI analysis features"
+    )
+    st.markdown("---")
+    st.markdown("v4.0 | © 2025 Neural-Vision")
+# Page Routing
+if "Data Upload & Analysis" in page:
+    data_upload_page()
+elif "Model Training Studio" in page:
+    model_training_page()
+else:
+    visualization_page()
+ai_assistant()