CosmickVisions commited on
Commit
5842911
·
verified ·
1 Parent(s): 5b73495

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +238 -231
app.py CHANGED
@@ -1,252 +1,259 @@
 
1
  import streamlit as st
2
  import pandas as pd
3
- import plotly.express as px
4
  import numpy as np
5
- from pycaret.classification import *
6
- from pycaret.regression import *
7
- from pycaret.clustering import *
8
  from ydata_profiling import ProfileReport
9
  from streamlit_pandas_profiling import st_profile_report
10
- import mlflow
11
  import requests
12
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  import os
14
 
15
- # Set page config
16
- st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide")
17
-
18
- # MLflow Tracking
19
- mlflow.set_tracking_uri("http://127.0.0.1:5000")
20
- mlflow.set_experiment("Neural-Vision Enhanced")
21
-
22
- # Initialize session state
23
- if 'metrics' not in st.session_state:
24
- st.session_state.metrics = {}
25
- if 'chat_history' not in st.session_state:
26
- st.session_state.chat_history = []
27
-
28
- # Enhanced Visualization Functions
29
- def visualize_classification():
30
- col1, col2 = st.columns(2)
31
- with col1:
32
- plot_model(st.session_state.best_model, plot='confusion_matrix', display_format='streamlit')
33
- with col2:
34
- plot_model(st.session_state.best_model, plot='auc', display_format='streamlit')
35
-
36
- col3, col4 = st.columns(2)
37
- with col3:
38
- plot_model(st.session_state.best_model, plot='feature', display_format='streamlit')
39
- with col4:
40
- plot_model(st.session_state.best_model, plot='pr', display_format='streamlit')
41
-
42
- def visualize_regression():
43
- col1, col2 = st.columns(2)
44
- with col1:
45
- plot_model(st.session_state.best_model, plot='residuals', display_format='streamlit')
46
- with col2:
47
- plot_model(st.session_state.best_model, plot='error', display_format='streamlit')
48
-
49
- col3, col4 = st.columns(2)
50
- with col3:
51
- plot_model(st.session_state.best_model, plot='cooks', display_format='streamlit')
52
- with col4:
53
- plot_model(st.session_state.best_model, plot='learning', display_format='streamlit')
54
-
55
- def visualize_clustering():
56
- col1, col2 = st.columns(2)
57
- with col1:
58
- plot_model(st.session_state.best_model, plot='cluster', display_format='streamlit')
59
- with col2:
60
- plot_model(st.session_state.best_model, plot='distribution', display_format='streamlit')
61
-
62
- col3, col4 = st.columns(2)
63
- with col3:
64
- plot_model(st.session_state.best_model, plot='elbow', display_format='streamlit')
65
- with col4:
66
- plot_model(st.session_state.best_model, plot='silhouette', display_format='streamlit')
67
-
68
- # Enhanced Context Generator
69
- def get_app_context():
70
- context = {
71
- "current_state": {
72
- "active_page": st.session_state.get('active_page', 'Data Upload'),
73
- "dataset_stats": {},
74
- "model_metrics": st.session_state.metrics,
75
- "problem_type": st.session_state.get('problem_type'),
76
- "target": st.session_state.get('target'),
77
- "best_model": str(st.session_state.get('best_model', None))
78
- },
79
- "app_capabilities": [
80
- "CSV data upload and statistical analysis",
81
- "Automated EDA report generation",
82
- "PyCaret-powered model training for classification, regression, and clustering",
83
- "Advanced model evaluation visualizations",
84
- "ML experiment tracking with MLflow",
85
- "AI-powered analysis through DeepSeek integration"
86
- ]
87
- }
88
-
89
- if 'df' in st.session_state:
90
- df = st.session_state.df
91
- context["current_state"]["dataset_stats"] = {
92
- "rows": df.shape[0],
93
- "columns": df.shape[1],
94
- "missing_values": df.isna().sum().sum(),
95
- "columns": {col: str(df[col].dtype) for col in df.columns}
96
- }
97
-
98
- return json.dumps(context)
99
 
100
- # Chatbot Handler
101
- def handle_ai_query(prompt):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  try:
103
- response = requests.post(
104
- "http://127.0.0.1:5001/analyze",
105
- json={
106
- "prompt": prompt,
107
- "context": get_app_context(),
108
- "metrics": st.session_state.metrics
109
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  )
111
- return response.json().get("analysis", "Error in analysis")
 
 
112
  except Exception as e:
113
- return f"Analysis error: {str(e)}"
114
 
115
- # Main App Components
116
- def data_upload_page():
117
- st.title("📤 Data Upload & Analysis")
118
- uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
119
-
120
- if uploaded_file:
121
- df = pd.read_csv(uploaded_file)
122
- st.session_state.df = df
123
- st.session_state.metrics = {}
124
-
125
- st.subheader("Dataset Health Check")
126
- col1, col2, col3 = st.columns(3)
127
- col1.metric("Total Samples", df.shape[0])
128
- col2.metric("Features", df.shape[1])
129
- col3.metric("Missing Values", df.isna().sum().sum())
130
-
131
- if st.button("Generate Full Profile Report"):
132
- with st.spinner("Generating report..."):
133
- pr = ProfileReport(df, explorative=True)
134
- st_profile_report(pr)
135
 
136
- def model_training_page():
137
- st.title("🧠 Model Training Studio")
138
-
139
- if 'df' not in st.session_state:
140
- st.warning("Upload data first!")
141
- return
142
-
143
- df = st.session_state.df
144
- problem_type = st.selectbox("Select Problem Type",
145
- ["Classification", "Regression", "Clustering"])
146
-
147
- if problem_type != "Clustering":
148
- target = st.selectbox("Select Target Variable", df.columns)
149
- st.session_state.target = target
150
-
151
- if st.button("Initialize Training Environment"):
152
- with st.spinner("Configuring PyCaret..."):
153
- if problem_type == "Classification":
154
- classification_setup(df, target=target, session_id=42)
155
- elif problem_type == "Regression":
156
- regression_setup(df, target=target, session_id=42)
157
  else:
158
- clustering_setup(df, session_id=42)
159
- st.session_state.problem_type = problem_type
160
- st.success("Environment ready for modeling!")
161
-
162
- if 'problem_type' in st.session_state:
163
- st.subheader("Model Training Dashboard")
164
- if st.session_state.problem_type in ["Classification", "Regression"]:
165
- compare_models = st.checkbox("Compare Multiple Models", True)
166
- n_models = st.slider("Number of Models", 1, 15, 5) if compare_models else 1
167
-
168
- if st.button("Start Training"):
169
- with st.spinner("Training in progress..."):
170
- if compare_models:
171
- models = compare_models(n_select=n_models)
172
- st.session_state.best_model = models[0]
173
- else:
174
- st.session_state.best_model = create_model()
175
-
176
- # Capture metrics
177
- results = pull()
178
- st.session_state.metrics = results.to_dict()
179
- st.success(f"Best Model: {st.session_state.best_model}")
180
-
181
- # Log to MLflow
182
- with mlflow.start_run():
183
- mlflow.log_metrics(results.iloc[0].to_dict())
184
- mlflow.sklearn.log_model(st.session_state.best_model, "model")
185
-
186
- def visualization_page():
187
- st.title("🔍 Model Evaluation Center")
188
-
189
- if 'best_model' not in st.session_state:
190
- st.warning("Train a model first!")
191
- return
192
-
193
- st.subheader("Performance Analysis")
194
-
195
- if st.session_state.problem_type == "Classification":
196
- visualize_classification()
197
- elif st.session_state.problem_type == "Regression":
198
- visualize_regression()
199
- else:
200
- visualize_clustering()
201
-
202
- st.subheader("Metric Analysis")
203
- st.dataframe(pd.DataFrame.from_dict(st.session_state.metrics))
204
-
205
- if st.button("Request AI Analysis"):
206
- analysis = handle_ai_query("Analyze these model metrics")
207
- st.markdown(f"**AI Analysis:**\n\n{analysis}")
208
 
209
- # Chatbot Interface
210
- def ai_assistant():
211
- st.markdown("---")
212
- st.subheader("🧠 Neural Insight Assistant")
213
-
214
- for msg in st.session_state.chat_history:
215
- st.chat_message(msg["role"]).write(msg["content"])
216
-
217
- if prompt := st.chat_input("Ask about models, data, or app usage"):
218
- st.session_state.chat_history.append({"role": "user", "content": prompt})
219
- st.chat_message("user").write(prompt)
220
-
221
- response = handle_ai_query(prompt)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- st.session_state.chat_history.append({"role": "assistant", "content": response})
224
- st.chat_message("assistant").write(response)
225
-
226
- # App Layout
227
- with st.sidebar:
228
- st.title("🔮 Neural-Vision Enhanced")
229
- page = st.selectbox("Navigation", [
230
- "Data Upload & Analysis",
231
- "Model Training Studio",
232
- "Model Evaluation Center"
233
- ])
234
- st.session_state.active_page = page
235
- st.markdown("---")
236
- st.markdown("**DeepSeek API Key**")
237
- os.environ["DEEPSEEK_API_KEY"] = st.text_input(
238
- "Enter API Key:", type="password",
239
- help="Required for AI analysis features"
240
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  st.markdown("---")
242
- st.markdown("v4.0 | © 2025 Neural-Vision")
 
 
 
243
 
244
- # Page Routing
245
- if "Data Upload & Analysis" in page:
246
- data_upload_page()
247
- elif "Model Training Studio" in page:
248
- model_training_page()
249
- else:
250
- visualization_page()
251
 
252
- ai_assistant()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app_combined.py
2
  import streamlit as st
3
  import pandas as pd
 
4
  import numpy as np
5
+ import plotly.express as px
6
+ import plotly.graph_objects as go
 
7
  from ydata_profiling import ProfileReport
8
  from streamlit_pandas_profiling import st_profile_report
 
9
  import requests
10
  import json
11
+ from datetime import datetime
12
+ import re
13
+ import tempfile
14
+ from scipy import stats
15
+ from sklearn.impute import SimpleImputer
16
+ from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
17
+ from sklearn.decomposition import PCA
18
+ import streamlit.components.v1 as components
19
+ from io import StringIO
20
+ from dotenv import load_dotenv
21
+ from flask import Flask, request, jsonify
22
+ from flask_cors import CORS
23
+ import openai
24
  import os
25
 
26
+ # Load environment variables
27
+ load_dotenv()
28
+
29
+ # Flask server setup
30
+ app = Flask(__name__)
31
+ CORS(app)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Configure DeepSeek API
34
+ openai.api_key = os.getenv("DEEPSEEK_API_KEY")
35
+ openai.api_base = "https://api.deepseek.com/v1"
36
+
37
+ # System prompt for the AI assistant
38
+ SYSTEM_PROMPT = '''
39
+ You are Neural Analyst, an AI assistant for the Neural-Vision Enhanced analytics platform.
40
+ Your capabilities include:
41
+ 1. Explaining model metrics and evaluation visualizations
42
+ 2. Interpreting dataset statistics and EDA reports
43
+ 3. Guiding users through app functionality
44
+ 4. Providing data science insights
45
+ 5. Comparing different model performances
46
+ Always consider:
47
+ - Current dataset statistics: {dataset_stats}
48
+ - Active problem type: {problem_type}
49
+ - Model metrics: {metrics}
50
+ - App state: {active_page}
51
+ '''
52
+
53
+ @app.route('/analyze', methods=['POST'])
54
+ def analyze():
55
  try:
56
+ data = request.json
57
+ context = json.loads(data['context'])
58
+
59
+ # Construct the prompt for DeepSeek
60
+ prompt = f'''
61
+ User Query: {data['prompt']}
62
+
63
+ Current Context:
64
+ - Active Page: {context['current_state']['active_page']}
65
+ - Problem Type: {context['current_state']['problem_type']}
66
+ - Target Variable: {context['current_state']['target']}
67
+ - Dataset Shape: {context['current_state']['dataset_stats'].get('rows', 0)} rows,
68
+ {context['current_state']['dataset_stats'].get('columns', 0)} columns
69
+ - Model Metrics: {json.dumps(context['current_state']['model_metrics'])}
70
+ '''
71
+
72
+ # Call DeepSeek API
73
+ response = openai.ChatCompletion.create(
74
+ model="deepseek-chat",
75
+ messages=[{
76
+ "role": "system",
77
+ "content": SYSTEM_PROMPT.format(**context['current_state'])
78
+ }, {
79
+ "role": "user",
80
+ "content": prompt
81
+ }],
82
+ temperature=0.3,
83
+ max_tokens=500
84
  )
85
+
86
+ return jsonify({"analysis": response.choices[0].message.content})
87
+
88
  except Exception as e:
89
+ return jsonify({"error": str(e)}), 500
90
 
91
+ # Streamlit app
92
+ def run_streamlit_app():
93
+ # Flask server URL
94
+ FLASK_URL = "http://localhost:5000/analyze"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
+ # Helper Functions
97
+ def enhance_section_title(title):
98
+ st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{title}</h2>", unsafe_allow_html=True)
99
+
100
+ def convert_csv_to_json_and_text(df):
101
+ """Convert DataFrame to JSON and then to plain text."""
102
+ json_data = df.to_json(orient="records")
103
+ data_dict = json.loads(json_data)
104
+
105
+ text_summary = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
106
+ text_summary += f"Missing Values: {df.isna().sum().sum()}\n"
107
+ text_summary += "Columns:\n"
108
+ for col in df.columns:
109
+ text_summary += f"- {col} ({df[col].dtype}): "
110
+ if pd.api.types.is_numeric_dtype(df[col]):
111
+ text_summary += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
 
 
 
 
 
112
  else:
113
+ text_summary += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
114
+ text_summary += f", Missing={df[col].isna().sum()}\n"
115
+ return text_summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
+ def get_chatbot_response(user_input, app_mode, dataset_text=""):
118
+ """Send request to Flask server for chatbot response."""
119
+ payload = {
120
+ "user_input": user_input,
121
+ "app_mode": app_mode,
122
+ "dataset_text": dataset_text
123
+ }
124
+ try:
125
+ response = requests.post(FLASK_URL, json=payload)
126
+ response.raise_for_status()
127
+ return response.json().get("response", "Error: No response from server")
128
+ except requests.exceptions.RequestException as e:
129
+ return f"Error: Could not connect to Flask server. {str(e)}"
130
+
131
+ # Sidebar Navigation
132
+ with st.sidebar:
133
+ st.title("🔮 Data-Vision Pro")
134
+ st.markdown("Your AI-powered data analysis suite.")
135
+ st.markdown("---")
136
+ app_mode = st.selectbox(
137
+ "Navigation",
138
+ ["Data Upload", "Data Cleaning", "EDA"],
139
+ format_func=lambda x: f"📌 {x}"
140
+ )
141
+ if app_mode == "Data Upload":
142
+ st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
143
+ elif app_mode == "Data Cleaning":
144
+ st.info("🧹 Clean and preprocess your data using various tools.")
145
+ elif app_mode == "EDA":
146
+ st.info("🔍 Explore your data visually and statistically.")
147
+
148
+ st.markdown("---")
149
+ st.markdown("**Note**: Requires `ydata-profiling`, `requests`, `flask`. Install via `pip install ydata-profiling requests flask`.")
150
+ if 'cleaned_data' in st.session_state:
151
+ csv = st.session_state.cleaned_data.to_csv(index=False)
152
+ st.download_button(
153
+ label="Download Cleaned Data as CSV",
154
+ data=csv,
155
+ file_name='cleaned_data.csv',
156
+ mime='text/csv',
157
+ )
158
+ st.markdown("Created by Calvin Allen-Crawford")
159
+ st.markdown("v1.0 | © 2025")
160
+
161
+ # Main App Pages
162
+ if app_mode == "Data Upload":
163
+ st.title("📤 Data Upload & Analysis")
164
+ uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
165
 
166
+ if uploaded_file:
167
+ try:
168
+ df = pd.read_csv(uploaded_file)
169
+ st.session_state.df = df
170
+ st.session_state.metrics = {}
171
+
172
+ st.subheader("Dataset Health Check")
173
+ col1, col2, col3 = st.columns(3)
174
+ col1.metric("Total Samples", df.shape[0])
175
+ col2.metric("Features", df.shape[1])
176
+ col3.metric("Missing Values", df.isna().sum().sum())
177
+
178
+ if st.button("Generate Full Profile Report"):
179
+ with st.spinner("Generating report..."):
180
+ pr = ProfileReport(df, explorative=True)
181
+ st_profile_report(pr)
182
+ except Exception as e:
183
+ st.error(f"Error reading the file: {str(e)}")
184
+
185
+ elif app_mode == "Data Cleaning":
186
+ st.title("🧹 Smart Data Cleaning")
187
+ st.header("Preprocess and Transform Your Data")
188
+ if 'raw_data' not in st.session_state:
189
+ st.warning("Please upload data first in the Data Upload section.")
190
+ st.stop()
191
+ if 'cleaned_data' not in st.session_state:
192
+ st.session_state.cleaned_data = st.session_state.raw_data.copy()
193
+ df = st.session_state.cleaned_data.copy()
194
+
195
+ enhance_section_title("📊 Data Health Dashboard")
196
+ with st.expander("Explore Data Health Metrics", expanded=True):
197
+ col1, col2, col3 = st.columns(3)
198
+ with col1: st.metric("Columns", len(df.columns))
199
+ with col2: st.metric("Rows", len(df))
200
+ with col3: st.metric("Missing Values", df.isna().sum().sum())
201
+ if st.button("Generate Detailed Health Report"):
202
+ with st.spinner("Generating report..."):
203
+ profile = ProfileReport(df, minimal=True)
204
+ st_profile_report(profile)
205
+ if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
206
+ if st.button("Undo Last Action"):
207
+ st.session_state.data_versions.pop()
208
+ st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
209
+ st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
210
+ st.rerun()
211
+
212
+ elif app_mode == "EDA":
213
+ st.title("🔍 Interactive Data Explorer")
214
+ if 'cleaned_data' not in st.session_state:
215
+ st.warning("Please upload and clean data first.")
216
+ st.stop()
217
+ df = st.session_state.cleaned_data.copy()
218
+
219
+ enhance_section_title("Dataset Overview")
220
+ with st.container():
221
+ col1, col2, col3, col4 = st.columns(4)
222
+ col1.metric("Total Rows", df.shape[0])
223
+ col2.metric("Total Columns", df.shape[1])
224
+ missing_percentage = df.isna().sum().sum() / df.size * 100
225
+ col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
226
+ col4.metric("Duplicates", df.duplicated().sum())
227
+
228
+ # Chatbot Section
229
  st.markdown("---")
230
+ st.subheader("💬 AI Chatbot Assistant")
231
+ st.info("Ask me about the app or your data! Try: 'What can I do here?' or 'What's in the dataset?'")
232
+ if "chat_history" not in st.session_state:
233
+ st.session_state.chat_history = []
234
 
235
+ for message in st.session_state.chat_history:
236
+ with st.chat_message(message["role"]):
237
+ st.markdown(message["content"])
 
 
 
 
238
 
239
+ user_input = st.chat_input("Ask me anything about the app or your data...")
240
+ if user_input:
241
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
242
+ with st.chat_message("user"):
243
+ st.markdown(user_input)
244
+
245
+ with st.spinner("Thinking..."):
246
+ dataset_text = st.session_state.get("dataset_text", "")
247
+ response = get_chatbot_response(user_input, app_mode, dataset_text)
248
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
249
+ with st.chat_message("assistant"):
250
+ st.markdown(response)
251
+
252
+ if __name__ == '__main__':
253
+ # Run Flask server in a separate thread
254
+ from threading import Thread
255
+ flask_thread = Thread(target=lambda: app.run(host='0.0.0.0', port=5000))
256
+ flask_thread.start()
257
+
258
+ # Run Streamlit app
259
+ run_streamlit_app()