CosmickVisions commited on
Commit
b4c02a4
·
verified ·
1 Parent(s): fcc0622

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +205 -239
app.py CHANGED
@@ -1,259 +1,225 @@
1
- # app_combined.py
2
  import streamlit as st
3
  import pandas as pd
4
- import numpy as np
5
  import plotly.express as px
6
- import plotly.graph_objects as go
 
 
 
7
  from ydata_profiling import ProfileReport
8
  from streamlit_pandas_profiling import st_profile_report
 
9
  import requests
10
  import json
11
- from datetime import datetime
12
- import re
13
- import tempfile
14
- from scipy import stats
15
- from sklearn.impute import SimpleImputer
16
- from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
17
- from sklearn.decomposition import PCA
18
- import streamlit.components.v1 as components
19
- from io import StringIO
20
- from dotenv import load_dotenv
21
- from flask import Flask, request, jsonify
22
- from flask_cors import CORS
23
- import openai
24
  import os
25
 
26
- # Load environment variables
27
- load_dotenv()
28
-
29
- # Flask server setup
30
- app = Flask(__name__)
31
- CORS(app)
32
-
33
- # Configure DeepSeek API
34
- openai.api_key = os.getenv("DEEPSEEK_API_KEY")
35
- openai.api_base = "https://api.deepseek.com/v1"
36
-
37
- # System prompt for the AI assistant
38
- SYSTEM_PROMPT = '''
39
- You are Neural Analyst, an AI assistant for the Neural-Vision Enhanced analytics platform.
40
- Your capabilities include:
41
- 1. Explaining model metrics and evaluation visualizations
42
- 2. Interpreting dataset statistics and EDA reports
43
- 3. Guiding users through app functionality
44
- 4. Providing data science insights
45
- 5. Comparing different model performances
46
- Always consider:
47
- - Current dataset statistics: {dataset_stats}
48
- - Active problem type: {problem_type}
49
- - Model metrics: {metrics}
50
- - App state: {active_page}
51
- '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
- @app.route('/analyze', methods=['POST'])
54
- def analyze():
55
  try:
56
- data = request.json
57
- context = json.loads(data['context'])
58
-
59
- # Construct the prompt for DeepSeek
60
- prompt = f'''
61
- User Query: {data['prompt']}
62
-
63
- Current Context:
64
- - Active Page: {context['current_state']['active_page']}
65
- - Problem Type: {context['current_state']['problem_type']}
66
- - Target Variable: {context['current_state']['target']}
67
- - Dataset Shape: {context['current_state']['dataset_stats'].get('rows', 0)} rows,
68
- {context['current_state']['dataset_stats'].get('columns', 0)} columns
69
- - Model Metrics: {json.dumps(context['current_state']['model_metrics'])}
70
- '''
71
-
72
- # Call DeepSeek API
73
- response = openai.ChatCompletion.create(
74
- model="deepseek-chat",
75
- messages=[{
76
- "role": "system",
77
- "content": SYSTEM_PROMPT.format(**context['current_state'])
78
- }, {
79
- "role": "user",
80
- "content": prompt
81
- }],
82
- temperature=0.3,
83
- max_tokens=500
84
  )
85
-
86
- return jsonify({"analysis": response.choices[0].message.content})
87
-
88
  except Exception as e:
89
- return jsonify({"error": str(e)}), 500
90
-
91
- # Streamlit app
92
- def run_streamlit_app():
93
- # Flask server URL
94
- FLASK_URL = "http://localhost:5000/analyze"
95
-
96
- # Helper Functions
97
- def enhance_section_title(title):
98
- st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{title}</h2>", unsafe_allow_html=True)
99
 
100
- def convert_csv_to_json_and_text(df):
101
- """Convert DataFrame to JSON and then to plain text."""
102
- json_data = df.to_json(orient="records")
103
- data_dict = json.loads(json_data)
104
-
105
- text_summary = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
106
- text_summary += f"Missing Values: {df.isna().sum().sum()}\n"
107
- text_summary += "Columns:\n"
108
- for col in df.columns:
109
- text_summary += f"- {col} ({df[col].dtype}): "
110
- if pd.api.types.is_numeric_dtype(df[col]):
111
- text_summary += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
112
- else:
113
- text_summary += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
114
- text_summary += f", Missing={df[col].isna().sum()}\n"
115
- return text_summary
116
-
117
- def get_chatbot_response(user_input, app_mode, dataset_text=""):
118
- """Send request to Flask server for chatbot response."""
119
- payload = {
120
- "user_input": user_input,
121
- "app_mode": app_mode,
122
- "dataset_text": dataset_text
123
- }
124
- try:
125
- response = requests.post(FLASK_URL, json=payload)
126
- response.raise_for_status()
127
- return response.json().get("response", "Error: No response from server")
128
- except requests.exceptions.RequestException as e:
129
- return f"Error: Could not connect to Flask server. {str(e)}"
130
-
131
- # Sidebar Navigation
132
- with st.sidebar:
133
- st.title("🔮 Data-Vision Pro")
134
- st.markdown("Your AI-powered data analysis suite.")
135
- st.markdown("---")
136
- app_mode = st.selectbox(
137
- "Navigation",
138
- ["Data Upload", "Data Cleaning", "EDA"],
139
- format_func=lambda x: f"📌 {x}"
140
- )
141
- if app_mode == "Data Upload":
142
- st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
143
- elif app_mode == "Data Cleaning":
144
- st.info("🧹 Clean and preprocess your data using various tools.")
145
- elif app_mode == "EDA":
146
- st.info("🔍 Explore your data visually and statistically.")
147
-
148
- st.markdown("---")
149
- st.markdown("**Note**: Requires `ydata-profiling`, `requests`, `flask`. Install via `pip install ydata-profiling requests flask`.")
150
- if 'cleaned_data' in st.session_state:
151
- csv = st.session_state.cleaned_data.to_csv(index=False)
152
- st.download_button(
153
- label="Download Cleaned Data as CSV",
154
- data=csv,
155
- file_name='cleaned_data.csv',
156
- mime='text/csv',
157
- )
158
- st.markdown("Created by Calvin Allen-Crawford")
159
- st.markdown("v1.0 | © 2025")
160
-
161
- # Main App Pages
162
- if app_mode == "Data Upload":
163
- st.title("📤 Data Upload & Analysis")
164
- uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
165
 
166
- if uploaded_file:
167
- try:
168
- df = pd.read_csv(uploaded_file)
169
- st.session_state.df = df
170
- st.session_state.metrics = {}
171
-
172
- st.subheader("Dataset Health Check")
173
- col1, col2, col3 = st.columns(3)
174
- col1.metric("Total Samples", df.shape[0])
175
- col2.metric("Features", df.shape[1])
176
- col3.metric("Missing Values", df.isna().sum().sum())
177
-
178
- if st.button("Generate Full Profile Report"):
179
- with st.spinner("Generating report..."):
180
- pr = ProfileReport(df, explorative=True)
181
- st_profile_report(pr)
182
- except Exception as e:
183
- st.error(f"Error reading the file: {str(e)}")
184
-
185
- elif app_mode == "Data Cleaning":
186
- st.title("🧹 Smart Data Cleaning")
187
- st.header("Preprocess and Transform Your Data")
188
- if 'raw_data' not in st.session_state:
189
- st.warning("Please upload data first in the Data Upload section.")
190
- st.stop()
191
- if 'cleaned_data' not in st.session_state:
192
- st.session_state.cleaned_data = st.session_state.raw_data.copy()
193
- df = st.session_state.cleaned_data.copy()
194
-
195
- enhance_section_title("📊 Data Health Dashboard")
196
- with st.expander("Explore Data Health Metrics", expanded=True):
197
- col1, col2, col3 = st.columns(3)
198
- with col1: st.metric("Columns", len(df.columns))
199
- with col2: st.metric("Rows", len(df))
200
- with col3: st.metric("Missing Values", df.isna().sum().sum())
201
- if st.button("Generate Detailed Health Report"):
202
- with st.spinner("Generating report..."):
203
- profile = ProfileReport(df, minimal=True)
204
- st_profile_report(profile)
205
- if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
206
- if st.button("Undo Last Action"):
207
- st.session_state.data_versions.pop()
208
- st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
209
- st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
210
- st.rerun()
211
-
212
- elif app_mode == "EDA":
213
- st.title("🔍 Interactive Data Explorer")
214
- if 'cleaned_data' not in st.session_state:
215
- st.warning("Please upload and clean data first.")
216
- st.stop()
217
- df = st.session_state.cleaned_data.copy()
218
 
219
- enhance_section_title("Dataset Overview")
220
- with st.container():
221
- col1, col2, col3, col4 = st.columns(4)
222
- col1.metric("Total Rows", df.shape[0])
223
- col2.metric("Total Columns", df.shape[1])
224
- missing_percentage = df.isna().sum().sum() / df.size * 100
225
- col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
226
- col4.metric("Duplicates", df.duplicated().sum())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
 
228
- # Chatbot Section
 
229
  st.markdown("---")
230
- st.subheader("💬 AI Chatbot Assistant")
231
- st.info("Ask me about the app or your data! Try: 'What can I do here?' or 'What's in the dataset?'")
232
- if "chat_history" not in st.session_state:
233
- st.session_state.chat_history = []
234
-
235
- for message in st.session_state.chat_history:
236
- with st.chat_message(message["role"]):
237
- st.markdown(message["content"])
238
-
239
- user_input = st.chat_input("Ask me anything about the app or your data...")
240
- if user_input:
241
- st.session_state.chat_history.append({"role": "user", "content": user_input})
242
- with st.chat_message("user"):
243
- st.markdown(user_input)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
- with st.spinner("Thinking..."):
246
- dataset_text = st.session_state.get("dataset_text", "")
247
- response = get_chatbot_response(user_input, app_mode, dataset_text)
248
- st.session_state.chat_history.append({"role": "assistant", "content": response})
249
- with st.chat_message("assistant"):
250
- st.markdown(response)
 
251
 
252
- if __name__ == '__main__':
253
- # Run Flask server in a separate thread
254
- from threading import Thread
255
- flask_thread = Thread(target=lambda: app.run(host='0.0.0.0', port=5000))
256
- flask_thread.start()
257
-
258
- # Run Streamlit app
259
- run_streamlit_app()
 
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import plotly.express as px
4
+ import numpy as np
5
+ from pycaret.classification import *
6
+ from pycaret.regression import *
7
+ from pycaret.clustering import *
8
  from ydata_profiling import ProfileReport
9
  from streamlit_pandas_profiling import st_profile_report
10
+ import mlflow
11
  import requests
12
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  import os
14
 
15
+ # Set page config
16
+ st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide")
17
+
18
+ # MLflow Tracking
19
+ mlflow.set_tracking_uri("http://127.0.0.1:5000")
20
+ mlflow.set_experiment("Neural-Vision Enhanced")
21
+
22
+ # Initialize session state
23
+ st.session_state.setdefault('metrics', {})
24
+ st.session_state.setdefault('chat_history', [])
25
+
26
+ # Enhanced Visualization Functions
27
+ def visualize_model(model, plots):
28
+ cols = st.columns(len(plots))
29
+ for col, plot in zip(cols, plots):
30
+ with col:
31
+ plot_model(model, plot=plot, display_format='streamlit')
32
+
33
+ def visualize_classification():
34
+ visualize_model(st.session_state.best_model, ['confusion_matrix', 'auc', 'feature', 'pr'])
35
+
36
+ def visualize_regression():
37
+ visualize_model(st.session_state.best_model, ['residuals', 'error', 'cooks', 'learning'])
38
+
39
+ def visualize_clustering():
40
+ visualize_model(st.session_state.best_model, ['cluster', 'distribution', 'elbow', 'silhouette'])
41
+
42
+ # Enhanced Context Generator
43
+ def get_app_context():
44
+ df_stats = {}
45
+ if 'df' in st.session_state:
46
+ df = st.session_state.df
47
+ df_stats = {
48
+ "rows": df.shape[0],
49
+ "columns": df.shape[1],
50
+ "missing_values": df.isna().sum().sum(),
51
+ "columns": {col: str(df[col].dtype) for col in df.columns}
52
+ }
53
+
54
+ context = {
55
+ "current_state": {
56
+ "active_page": st.session_state.get('active_page', 'Data Upload'),
57
+ "dataset_stats": df_stats,
58
+ "model_metrics": st.session_state.metrics,
59
+ "problem_type": st.session_state.get('problem_type'),
60
+ "target": st.session_state.get('target'),
61
+ "best_model": str(st.session_state.get('best_model', None))
62
+ },
63
+ "app_capabilities": [
64
+ "CSV data upload and statistical analysis",
65
+ "Automated EDA report generation",
66
+ "PyCaret-powered model training for classification, regression, and clustering",
67
+ "Advanced model evaluation visualizations",
68
+ "ML experiment tracking with MLflow",
69
+ "AI-powered analysis through DeepSeek integration"
70
+ ]
71
+ }
72
+
73
+ return json.dumps(context)
74
 
75
+ # Chatbot Handler
76
+ def handle_ai_query(prompt):
77
  try:
78
+ response = requests.post(
79
+ "http://127.0.0.1:5001/analyze",
80
+ json={
81
+ "prompt": prompt,
82
+ "context": get_app_context(),
83
+ "metrics": st.session_state.metrics
84
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  )
86
+ return response.json().get("analysis", "Error in analysis")
 
 
87
  except Exception as e:
88
+ return f"Analysis error: {str(e)}"
 
 
 
 
 
 
 
 
 
89
 
90
+ # Main App Components
91
+ def data_upload_page():
92
+ st.title("📤 Data Upload & Analysis")
93
+ uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
94
+
95
+ if uploaded_file:
96
+ df = pd.read_csv(uploaded_file)
97
+ st.session_state.df = df
98
+ st.session_state.metrics = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
+ st.subheader("Dataset Health Check")
101
+ col1, col2, col3 = st.columns(3)
102
+ col1.metric("Total Samples", df.shape[0])
103
+ col2.metric("Features", df.shape[1])
104
+ col3.metric("Missing Values", df.isna().sum().sum())
105
+
106
+ if st.button("Generate Full EDA Report"):
107
+ with st.spinner("Generating comprehensive analysis..."):
108
+ profile = ProfileReport(df, explorative=True)
109
+ st_profile_report(profile)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ def model_training_page():
112
+ st.title("🧠 Model Training Studio")
113
+
114
+ if 'df' not in st.session_state:
115
+ st.warning("Upload data first!")
116
+ return
117
+
118
+ df = st.session_state.df
119
+ problem_type = st.selectbox("Select Problem Type", ["Classification", "Regression", "Clustering"])
120
+
121
+ if problem_type != "Clustering":
122
+ st.session_state.target = st.selectbox("Select Target Variable", df.columns)
123
+
124
+ if st.button("Initialize Training Environment"):
125
+ with st.spinner("Configuring PyCaret..."):
126
+ setup_func = {
127
+ "Classification": classification_setup,
128
+ "Regression": regression_setup,
129
+ "Clustering": clustering_setup
130
+ }[problem_type]
131
+ setup_func(df, target=st.session_state.get('target'), session_id=42)
132
+ st.session_state.problem_type = problem_type
133
+ st.success("Environment ready for modeling!")
134
+
135
+ if 'problem_type' in st.session_state:
136
+ st.subheader("Model Training Dashboard")
137
+ if st.session_state.problem_type in ["Classification", "Regression"]:
138
+ compare_models = st.checkbox("Compare Multiple Models", True)
139
+ n_models = st.slider("Number of Models", 1, 15, 5) if compare_models else 1
140
+
141
+ if st.button("Start Training"):
142
+ with st.spinner("Training in progress..."):
143
+ if compare_models:
144
+ models = compare_models(n_select=n_models)
145
+ st.session_state.best_model = models[0]
146
+ else:
147
+ st.session_state.best_model = create_model()
148
+
149
+ # Capture metrics
150
+ results = pull()
151
+ st.session_state.metrics = results.to_dict()
152
+ st.success(f"Best Model: {st.session_state.best_model}")
153
+
154
+ # Log to MLflow
155
+ with mlflow.start_run():
156
+ mlflow.log_metrics(results.iloc[0].to_dict())
157
+ mlflow.sklearn.log_model(st.session_state.best_model, "model")
158
+
159
+ def visualization_page():
160
+ st.title("🔍 Model Evaluation Center")
161
+
162
+ if 'best_model' not in st.session_state:
163
+ st.warning("Train a model first!")
164
+ return
165
+
166
+ st.subheader("Performance Analysis")
167
+
168
+ visualizers = {
169
+ "Classification": visualize_classification,
170
+ "Regression": visualize_regression,
171
+ "Clustering": visualize_clustering
172
+ }
173
+ visualizers[st.session_state.problem_type]()
174
+
175
+ st.subheader("Metric Analysis")
176
+ st.dataframe(pd.DataFrame.from_dict(st.session_state.metrics))
177
+
178
+ if st.button("Request AI Analysis"):
179
+ analysis = handle_ai_query("Analyze these model metrics")
180
+ st.markdown(f"**AI Analysis:**\n\n{analysis}")
181
 
182
+ # Chatbot Interface
183
+ def ai_assistant():
184
  st.markdown("---")
185
+ st.subheader("🧠 Neural Insight Assistant")
186
+
187
+ for msg in st.session_state.chat_history:
188
+ st.chat_message(msg["role"]).write(msg["content"])
189
+
190
+ if prompt := st.chat_input("Ask about models, data, or app usage"):
191
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
192
+ st.chat_message("user").write(prompt)
193
+
194
+ response = handle_ai_query(prompt)
195
+
196
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
197
+ st.chat_message("assistant").write(response)
198
+
199
+ # App Layout
200
+ with st.sidebar:
201
+ st.title("🔮 Neural-Vision Enhanced")
202
+ page = st.selectbox("Navigation", [
203
+ "Data Upload & Analysis",
204
+ "Model Training Studio",
205
+ "Model Evaluation Center"
206
+ ])
207
+ st.session_state.active_page = page
208
+ st.markdown("---")
209
+ st.markdown("**DeepSeek API Key**")
210
+ os.environ["DEEPSEEK_API_KEY"] = st.text_input(
211
+ "Enter API Key:", type="password",
212
+ help="Required for AI analysis features"
213
+ )
214
+ st.markdown("---")
215
+ st.markdown("v4.0 | © 2025 Neural-Vision")
216
 
217
+ # Page Routing
218
+ if "Data Upload & Analysis" in page:
219
+ data_upload_page()
220
+ elif "Model Training Studio" in page:
221
+ model_training_page()
222
+ else:
223
+ visualization_page()
224
 
225
+ ai_assistant()