Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,259 +1,225 @@
|
|
1 |
-
# app_combined.py
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
-
import numpy as np
|
5 |
import plotly.express as px
|
6 |
-
import
|
|
|
|
|
|
|
7 |
from ydata_profiling import ProfileReport
|
8 |
from streamlit_pandas_profiling import st_profile_report
|
|
|
9 |
import requests
|
10 |
import json
|
11 |
-
from datetime import datetime
|
12 |
-
import re
|
13 |
-
import tempfile
|
14 |
-
from scipy import stats
|
15 |
-
from sklearn.impute import SimpleImputer
|
16 |
-
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
|
17 |
-
from sklearn.decomposition import PCA
|
18 |
-
import streamlit.components.v1 as components
|
19 |
-
from io import StringIO
|
20 |
-
from dotenv import load_dotenv
|
21 |
-
from flask import Flask, request, jsonify
|
22 |
-
from flask_cors import CORS
|
23 |
-
import openai
|
24 |
import os
|
25 |
|
26 |
-
#
|
27 |
-
|
28 |
-
|
29 |
-
#
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
#
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
def
|
55 |
try:
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
Current Context:
|
64 |
-
- Active Page: {context['current_state']['active_page']}
|
65 |
-
- Problem Type: {context['current_state']['problem_type']}
|
66 |
-
- Target Variable: {context['current_state']['target']}
|
67 |
-
- Dataset Shape: {context['current_state']['dataset_stats'].get('rows', 0)} rows,
|
68 |
-
{context['current_state']['dataset_stats'].get('columns', 0)} columns
|
69 |
-
- Model Metrics: {json.dumps(context['current_state']['model_metrics'])}
|
70 |
-
'''
|
71 |
-
|
72 |
-
# Call DeepSeek API
|
73 |
-
response = openai.ChatCompletion.create(
|
74 |
-
model="deepseek-chat",
|
75 |
-
messages=[{
|
76 |
-
"role": "system",
|
77 |
-
"content": SYSTEM_PROMPT.format(**context['current_state'])
|
78 |
-
}, {
|
79 |
-
"role": "user",
|
80 |
-
"content": prompt
|
81 |
-
}],
|
82 |
-
temperature=0.3,
|
83 |
-
max_tokens=500
|
84 |
)
|
85 |
-
|
86 |
-
return jsonify({"analysis": response.choices[0].message.content})
|
87 |
-
|
88 |
except Exception as e:
|
89 |
-
return
|
90 |
-
|
91 |
-
# Streamlit app
|
92 |
-
def run_streamlit_app():
|
93 |
-
# Flask server URL
|
94 |
-
FLASK_URL = "http://localhost:5000/analyze"
|
95 |
-
|
96 |
-
# Helper Functions
|
97 |
-
def enhance_section_title(title):
|
98 |
-
st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{title}</h2>", unsafe_allow_html=True)
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
text_summary += f"- {col} ({df[col].dtype}): "
|
110 |
-
if pd.api.types.is_numeric_dtype(df[col]):
|
111 |
-
text_summary += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
|
112 |
-
else:
|
113 |
-
text_summary += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
|
114 |
-
text_summary += f", Missing={df[col].isna().sum()}\n"
|
115 |
-
return text_summary
|
116 |
-
|
117 |
-
def get_chatbot_response(user_input, app_mode, dataset_text=""):
|
118 |
-
"""Send request to Flask server for chatbot response."""
|
119 |
-
payload = {
|
120 |
-
"user_input": user_input,
|
121 |
-
"app_mode": app_mode,
|
122 |
-
"dataset_text": dataset_text
|
123 |
-
}
|
124 |
-
try:
|
125 |
-
response = requests.post(FLASK_URL, json=payload)
|
126 |
-
response.raise_for_status()
|
127 |
-
return response.json().get("response", "Error: No response from server")
|
128 |
-
except requests.exceptions.RequestException as e:
|
129 |
-
return f"Error: Could not connect to Flask server. {str(e)}"
|
130 |
-
|
131 |
-
# Sidebar Navigation
|
132 |
-
with st.sidebar:
|
133 |
-
st.title("🔮 Data-Vision Pro")
|
134 |
-
st.markdown("Your AI-powered data analysis suite.")
|
135 |
-
st.markdown("---")
|
136 |
-
app_mode = st.selectbox(
|
137 |
-
"Navigation",
|
138 |
-
["Data Upload", "Data Cleaning", "EDA"],
|
139 |
-
format_func=lambda x: f"📌 {x}"
|
140 |
-
)
|
141 |
-
if app_mode == "Data Upload":
|
142 |
-
st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
|
143 |
-
elif app_mode == "Data Cleaning":
|
144 |
-
st.info("🧹 Clean and preprocess your data using various tools.")
|
145 |
-
elif app_mode == "EDA":
|
146 |
-
st.info("🔍 Explore your data visually and statistically.")
|
147 |
-
|
148 |
-
st.markdown("---")
|
149 |
-
st.markdown("**Note**: Requires `ydata-profiling`, `requests`, `flask`. Install via `pip install ydata-profiling requests flask`.")
|
150 |
-
if 'cleaned_data' in st.session_state:
|
151 |
-
csv = st.session_state.cleaned_data.to_csv(index=False)
|
152 |
-
st.download_button(
|
153 |
-
label="Download Cleaned Data as CSV",
|
154 |
-
data=csv,
|
155 |
-
file_name='cleaned_data.csv',
|
156 |
-
mime='text/csv',
|
157 |
-
)
|
158 |
-
st.markdown("Created by Calvin Allen-Crawford")
|
159 |
-
st.markdown("v1.0 | © 2025")
|
160 |
-
|
161 |
-
# Main App Pages
|
162 |
-
if app_mode == "Data Upload":
|
163 |
-
st.title("📤 Data Upload & Analysis")
|
164 |
-
uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
|
165 |
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
col3.metric("Missing Values", df.isna().sum().sum())
|
177 |
-
|
178 |
-
if st.button("Generate Full Profile Report"):
|
179 |
-
with st.spinner("Generating report..."):
|
180 |
-
pr = ProfileReport(df, explorative=True)
|
181 |
-
st_profile_report(pr)
|
182 |
-
except Exception as e:
|
183 |
-
st.error(f"Error reading the file: {str(e)}")
|
184 |
-
|
185 |
-
elif app_mode == "Data Cleaning":
|
186 |
-
st.title("🧹 Smart Data Cleaning")
|
187 |
-
st.header("Preprocess and Transform Your Data")
|
188 |
-
if 'raw_data' not in st.session_state:
|
189 |
-
st.warning("Please upload data first in the Data Upload section.")
|
190 |
-
st.stop()
|
191 |
-
if 'cleaned_data' not in st.session_state:
|
192 |
-
st.session_state.cleaned_data = st.session_state.raw_data.copy()
|
193 |
-
df = st.session_state.cleaned_data.copy()
|
194 |
-
|
195 |
-
enhance_section_title("📊 Data Health Dashboard")
|
196 |
-
with st.expander("Explore Data Health Metrics", expanded=True):
|
197 |
-
col1, col2, col3 = st.columns(3)
|
198 |
-
with col1: st.metric("Columns", len(df.columns))
|
199 |
-
with col2: st.metric("Rows", len(df))
|
200 |
-
with col3: st.metric("Missing Values", df.isna().sum().sum())
|
201 |
-
if st.button("Generate Detailed Health Report"):
|
202 |
-
with st.spinner("Generating report..."):
|
203 |
-
profile = ProfileReport(df, minimal=True)
|
204 |
-
st_profile_report(profile)
|
205 |
-
if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
|
206 |
-
if st.button("Undo Last Action"):
|
207 |
-
st.session_state.data_versions.pop()
|
208 |
-
st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
|
209 |
-
st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
|
210 |
-
st.rerun()
|
211 |
-
|
212 |
-
elif app_mode == "EDA":
|
213 |
-
st.title("🔍 Interactive Data Explorer")
|
214 |
-
if 'cleaned_data' not in st.session_state:
|
215 |
-
st.warning("Please upload and clean data first.")
|
216 |
-
st.stop()
|
217 |
-
df = st.session_state.cleaned_data.copy()
|
218 |
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
|
228 |
-
|
|
|
229 |
st.markdown("---")
|
230 |
-
st.subheader("
|
231 |
-
|
232 |
-
|
233 |
-
st.
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
st.session_state.chat_history.append({"role": "
|
242 |
-
|
243 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
|
|
251 |
|
252 |
-
|
253 |
-
# Run Flask server in a separate thread
|
254 |
-
from threading import Thread
|
255 |
-
flask_thread = Thread(target=lambda: app.run(host='0.0.0.0', port=5000))
|
256 |
-
flask_thread.start()
|
257 |
-
|
258 |
-
# Run Streamlit app
|
259 |
-
run_streamlit_app()
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
import plotly.express as px
|
4 |
+
import numpy as np
|
5 |
+
from pycaret.classification import *
|
6 |
+
from pycaret.regression import *
|
7 |
+
from pycaret.clustering import *
|
8 |
from ydata_profiling import ProfileReport
|
9 |
from streamlit_pandas_profiling import st_profile_report
|
10 |
+
import mlflow
|
11 |
import requests
|
12 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
import os
|
14 |
|
15 |
+
# Set page config
|
16 |
+
st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide")
|
17 |
+
|
18 |
+
# MLflow Tracking
|
19 |
+
mlflow.set_tracking_uri("http://127.0.0.1:5000")
|
20 |
+
mlflow.set_experiment("Neural-Vision Enhanced")
|
21 |
+
|
22 |
+
# Initialize session state
|
23 |
+
st.session_state.setdefault('metrics', {})
|
24 |
+
st.session_state.setdefault('chat_history', [])
|
25 |
+
|
26 |
+
# Enhanced Visualization Functions
|
27 |
+
def visualize_model(model, plots):
|
28 |
+
cols = st.columns(len(plots))
|
29 |
+
for col, plot in zip(cols, plots):
|
30 |
+
with col:
|
31 |
+
plot_model(model, plot=plot, display_format='streamlit')
|
32 |
+
|
33 |
+
def visualize_classification():
|
34 |
+
visualize_model(st.session_state.best_model, ['confusion_matrix', 'auc', 'feature', 'pr'])
|
35 |
+
|
36 |
+
def visualize_regression():
|
37 |
+
visualize_model(st.session_state.best_model, ['residuals', 'error', 'cooks', 'learning'])
|
38 |
+
|
39 |
+
def visualize_clustering():
|
40 |
+
visualize_model(st.session_state.best_model, ['cluster', 'distribution', 'elbow', 'silhouette'])
|
41 |
+
|
42 |
+
# Enhanced Context Generator
|
43 |
+
def get_app_context():
|
44 |
+
df_stats = {}
|
45 |
+
if 'df' in st.session_state:
|
46 |
+
df = st.session_state.df
|
47 |
+
df_stats = {
|
48 |
+
"rows": df.shape[0],
|
49 |
+
"columns": df.shape[1],
|
50 |
+
"missing_values": df.isna().sum().sum(),
|
51 |
+
"columns": {col: str(df[col].dtype) for col in df.columns}
|
52 |
+
}
|
53 |
+
|
54 |
+
context = {
|
55 |
+
"current_state": {
|
56 |
+
"active_page": st.session_state.get('active_page', 'Data Upload'),
|
57 |
+
"dataset_stats": df_stats,
|
58 |
+
"model_metrics": st.session_state.metrics,
|
59 |
+
"problem_type": st.session_state.get('problem_type'),
|
60 |
+
"target": st.session_state.get('target'),
|
61 |
+
"best_model": str(st.session_state.get('best_model', None))
|
62 |
+
},
|
63 |
+
"app_capabilities": [
|
64 |
+
"CSV data upload and statistical analysis",
|
65 |
+
"Automated EDA report generation",
|
66 |
+
"PyCaret-powered model training for classification, regression, and clustering",
|
67 |
+
"Advanced model evaluation visualizations",
|
68 |
+
"ML experiment tracking with MLflow",
|
69 |
+
"AI-powered analysis through DeepSeek integration"
|
70 |
+
]
|
71 |
+
}
|
72 |
+
|
73 |
+
return json.dumps(context)
|
74 |
|
75 |
+
# Chatbot Handler
|
76 |
+
def handle_ai_query(prompt):
|
77 |
try:
|
78 |
+
response = requests.post(
|
79 |
+
"http://127.0.0.1:5001/analyze",
|
80 |
+
json={
|
81 |
+
"prompt": prompt,
|
82 |
+
"context": get_app_context(),
|
83 |
+
"metrics": st.session_state.metrics
|
84 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
)
|
86 |
+
return response.json().get("analysis", "Error in analysis")
|
|
|
|
|
87 |
except Exception as e:
|
88 |
+
return f"Analysis error: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
+
# Main App Components
|
91 |
+
def data_upload_page():
|
92 |
+
st.title("📤 Data Upload & Analysis")
|
93 |
+
uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
|
94 |
+
|
95 |
+
if uploaded_file:
|
96 |
+
df = pd.read_csv(uploaded_file)
|
97 |
+
st.session_state.df = df
|
98 |
+
st.session_state.metrics = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
st.subheader("Dataset Health Check")
|
101 |
+
col1, col2, col3 = st.columns(3)
|
102 |
+
col1.metric("Total Samples", df.shape[0])
|
103 |
+
col2.metric("Features", df.shape[1])
|
104 |
+
col3.metric("Missing Values", df.isna().sum().sum())
|
105 |
+
|
106 |
+
if st.button("Generate Full EDA Report"):
|
107 |
+
with st.spinner("Generating comprehensive analysis..."):
|
108 |
+
profile = ProfileReport(df, explorative=True)
|
109 |
+
st_profile_report(profile)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
+
def model_training_page():
|
112 |
+
st.title("🧠 Model Training Studio")
|
113 |
+
|
114 |
+
if 'df' not in st.session_state:
|
115 |
+
st.warning("Upload data first!")
|
116 |
+
return
|
117 |
+
|
118 |
+
df = st.session_state.df
|
119 |
+
problem_type = st.selectbox("Select Problem Type", ["Classification", "Regression", "Clustering"])
|
120 |
+
|
121 |
+
if problem_type != "Clustering":
|
122 |
+
st.session_state.target = st.selectbox("Select Target Variable", df.columns)
|
123 |
+
|
124 |
+
if st.button("Initialize Training Environment"):
|
125 |
+
with st.spinner("Configuring PyCaret..."):
|
126 |
+
setup_func = {
|
127 |
+
"Classification": classification_setup,
|
128 |
+
"Regression": regression_setup,
|
129 |
+
"Clustering": clustering_setup
|
130 |
+
}[problem_type]
|
131 |
+
setup_func(df, target=st.session_state.get('target'), session_id=42)
|
132 |
+
st.session_state.problem_type = problem_type
|
133 |
+
st.success("Environment ready for modeling!")
|
134 |
+
|
135 |
+
if 'problem_type' in st.session_state:
|
136 |
+
st.subheader("Model Training Dashboard")
|
137 |
+
if st.session_state.problem_type in ["Classification", "Regression"]:
|
138 |
+
compare_models = st.checkbox("Compare Multiple Models", True)
|
139 |
+
n_models = st.slider("Number of Models", 1, 15, 5) if compare_models else 1
|
140 |
+
|
141 |
+
if st.button("Start Training"):
|
142 |
+
with st.spinner("Training in progress..."):
|
143 |
+
if compare_models:
|
144 |
+
models = compare_models(n_select=n_models)
|
145 |
+
st.session_state.best_model = models[0]
|
146 |
+
else:
|
147 |
+
st.session_state.best_model = create_model()
|
148 |
+
|
149 |
+
# Capture metrics
|
150 |
+
results = pull()
|
151 |
+
st.session_state.metrics = results.to_dict()
|
152 |
+
st.success(f"Best Model: {st.session_state.best_model}")
|
153 |
+
|
154 |
+
# Log to MLflow
|
155 |
+
with mlflow.start_run():
|
156 |
+
mlflow.log_metrics(results.iloc[0].to_dict())
|
157 |
+
mlflow.sklearn.log_model(st.session_state.best_model, "model")
|
158 |
+
|
159 |
+
def visualization_page():
|
160 |
+
st.title("🔍 Model Evaluation Center")
|
161 |
+
|
162 |
+
if 'best_model' not in st.session_state:
|
163 |
+
st.warning("Train a model first!")
|
164 |
+
return
|
165 |
+
|
166 |
+
st.subheader("Performance Analysis")
|
167 |
+
|
168 |
+
visualizers = {
|
169 |
+
"Classification": visualize_classification,
|
170 |
+
"Regression": visualize_regression,
|
171 |
+
"Clustering": visualize_clustering
|
172 |
+
}
|
173 |
+
visualizers[st.session_state.problem_type]()
|
174 |
+
|
175 |
+
st.subheader("Metric Analysis")
|
176 |
+
st.dataframe(pd.DataFrame.from_dict(st.session_state.metrics))
|
177 |
+
|
178 |
+
if st.button("Request AI Analysis"):
|
179 |
+
analysis = handle_ai_query("Analyze these model metrics")
|
180 |
+
st.markdown(f"**AI Analysis:**\n\n{analysis}")
|
181 |
|
182 |
+
# Chatbot Interface
|
183 |
+
def ai_assistant():
|
184 |
st.markdown("---")
|
185 |
+
st.subheader("🧠 Neural Insight Assistant")
|
186 |
+
|
187 |
+
for msg in st.session_state.chat_history:
|
188 |
+
st.chat_message(msg["role"]).write(msg["content"])
|
189 |
+
|
190 |
+
if prompt := st.chat_input("Ask about models, data, or app usage"):
|
191 |
+
st.session_state.chat_history.append({"role": "user", "content": prompt})
|
192 |
+
st.chat_message("user").write(prompt)
|
193 |
+
|
194 |
+
response = handle_ai_query(prompt)
|
195 |
+
|
196 |
+
st.session_state.chat_history.append({"role": "assistant", "content": response})
|
197 |
+
st.chat_message("assistant").write(response)
|
198 |
+
|
199 |
+
# App Layout
|
200 |
+
with st.sidebar:
|
201 |
+
st.title("🔮 Neural-Vision Enhanced")
|
202 |
+
page = st.selectbox("Navigation", [
|
203 |
+
"Data Upload & Analysis",
|
204 |
+
"Model Training Studio",
|
205 |
+
"Model Evaluation Center"
|
206 |
+
])
|
207 |
+
st.session_state.active_page = page
|
208 |
+
st.markdown("---")
|
209 |
+
st.markdown("**DeepSeek API Key**")
|
210 |
+
os.environ["DEEPSEEK_API_KEY"] = st.text_input(
|
211 |
+
"Enter API Key:", type="password",
|
212 |
+
help="Required for AI analysis features"
|
213 |
+
)
|
214 |
+
st.markdown("---")
|
215 |
+
st.markdown("v4.0 | © 2025 Neural-Vision")
|
216 |
|
217 |
+
# Page Routing
|
218 |
+
if "Data Upload & Analysis" in page:
|
219 |
+
data_upload_page()
|
220 |
+
elif "Model Training Studio" in page:
|
221 |
+
model_training_page()
|
222 |
+
else:
|
223 |
+
visualization_page()
|
224 |
|
225 |
+
ai_assistant()
|
|
|
|
|
|
|
|
|
|
|
|
|
|