Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,252 +1,259 @@
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import plotly.express as px
|
4 |
import numpy as np
|
5 |
-
|
6 |
-
|
7 |
-
from pycaret.clustering import *
|
8 |
from ydata_profiling import ProfileReport
|
9 |
from streamlit_pandas_profiling import st_profile_report
|
10 |
-
import mlflow
|
11 |
import requests
|
12 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
import os
|
14 |
|
15 |
-
#
|
16 |
-
|
17 |
-
|
18 |
-
#
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
# Initialize session state
|
23 |
-
if 'metrics' not in st.session_state:
|
24 |
-
st.session_state.metrics = {}
|
25 |
-
if 'chat_history' not in st.session_state:
|
26 |
-
st.session_state.chat_history = []
|
27 |
-
|
28 |
-
# Enhanced Visualization Functions
|
29 |
-
def visualize_classification():
|
30 |
-
col1, col2 = st.columns(2)
|
31 |
-
with col1:
|
32 |
-
plot_model(st.session_state.best_model, plot='confusion_matrix', display_format='streamlit')
|
33 |
-
with col2:
|
34 |
-
plot_model(st.session_state.best_model, plot='auc', display_format='streamlit')
|
35 |
-
|
36 |
-
col3, col4 = st.columns(2)
|
37 |
-
with col3:
|
38 |
-
plot_model(st.session_state.best_model, plot='feature', display_format='streamlit')
|
39 |
-
with col4:
|
40 |
-
plot_model(st.session_state.best_model, plot='pr', display_format='streamlit')
|
41 |
-
|
42 |
-
def visualize_regression():
|
43 |
-
col1, col2 = st.columns(2)
|
44 |
-
with col1:
|
45 |
-
plot_model(st.session_state.best_model, plot='residuals', display_format='streamlit')
|
46 |
-
with col2:
|
47 |
-
plot_model(st.session_state.best_model, plot='error', display_format='streamlit')
|
48 |
-
|
49 |
-
col3, col4 = st.columns(2)
|
50 |
-
with col3:
|
51 |
-
plot_model(st.session_state.best_model, plot='cooks', display_format='streamlit')
|
52 |
-
with col4:
|
53 |
-
plot_model(st.session_state.best_model, plot='learning', display_format='streamlit')
|
54 |
-
|
55 |
-
def visualize_clustering():
|
56 |
-
col1, col2 = st.columns(2)
|
57 |
-
with col1:
|
58 |
-
plot_model(st.session_state.best_model, plot='cluster', display_format='streamlit')
|
59 |
-
with col2:
|
60 |
-
plot_model(st.session_state.best_model, plot='distribution', display_format='streamlit')
|
61 |
-
|
62 |
-
col3, col4 = st.columns(2)
|
63 |
-
with col3:
|
64 |
-
plot_model(st.session_state.best_model, plot='elbow', display_format='streamlit')
|
65 |
-
with col4:
|
66 |
-
plot_model(st.session_state.best_model, plot='silhouette', display_format='streamlit')
|
67 |
-
|
68 |
-
# Enhanced Context Generator
|
69 |
-
def get_app_context():
|
70 |
-
context = {
|
71 |
-
"current_state": {
|
72 |
-
"active_page": st.session_state.get('active_page', 'Data Upload'),
|
73 |
-
"dataset_stats": {},
|
74 |
-
"model_metrics": st.session_state.metrics,
|
75 |
-
"problem_type": st.session_state.get('problem_type'),
|
76 |
-
"target": st.session_state.get('target'),
|
77 |
-
"best_model": str(st.session_state.get('best_model', None))
|
78 |
-
},
|
79 |
-
"app_capabilities": [
|
80 |
-
"CSV data upload and statistical analysis",
|
81 |
-
"Automated EDA report generation",
|
82 |
-
"PyCaret-powered model training for classification, regression, and clustering",
|
83 |
-
"Advanced model evaluation visualizations",
|
84 |
-
"ML experiment tracking with MLflow",
|
85 |
-
"AI-powered analysis through DeepSeek integration"
|
86 |
-
]
|
87 |
-
}
|
88 |
-
|
89 |
-
if 'df' in st.session_state:
|
90 |
-
df = st.session_state.df
|
91 |
-
context["current_state"]["dataset_stats"] = {
|
92 |
-
"rows": df.shape[0],
|
93 |
-
"columns": df.shape[1],
|
94 |
-
"missing_values": df.isna().sum().sum(),
|
95 |
-
"columns": {col: str(df[col].dtype) for col in df.columns}
|
96 |
-
}
|
97 |
-
|
98 |
-
return json.dumps(context)
|
99 |
|
100 |
-
#
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
try:
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
)
|
111 |
-
|
|
|
|
|
112 |
except Exception as e:
|
113 |
-
return
|
114 |
|
115 |
-
#
|
116 |
-
def
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
if uploaded_file:
|
121 |
-
df = pd.read_csv(uploaded_file)
|
122 |
-
st.session_state.df = df
|
123 |
-
st.session_state.metrics = {}
|
124 |
-
|
125 |
-
st.subheader("Dataset Health Check")
|
126 |
-
col1, col2, col3 = st.columns(3)
|
127 |
-
col1.metric("Total Samples", df.shape[0])
|
128 |
-
col2.metric("Features", df.shape[1])
|
129 |
-
col3.metric("Missing Values", df.isna().sum().sum())
|
130 |
-
|
131 |
-
if st.button("Generate Full Profile Report"):
|
132 |
-
with st.spinner("Generating report..."):
|
133 |
-
pr = ProfileReport(df, explorative=True)
|
134 |
-
st_profile_report(pr)
|
135 |
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
with st.spinner("Configuring PyCaret..."):
|
153 |
-
if problem_type == "Classification":
|
154 |
-
classification_setup(df, target=target, session_id=42)
|
155 |
-
elif problem_type == "Regression":
|
156 |
-
regression_setup(df, target=target, session_id=42)
|
157 |
else:
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
if 'problem_type' in st.session_state:
|
163 |
-
st.subheader("Model Training Dashboard")
|
164 |
-
if st.session_state.problem_type in ["Classification", "Regression"]:
|
165 |
-
compare_models = st.checkbox("Compare Multiple Models", True)
|
166 |
-
n_models = st.slider("Number of Models", 1, 15, 5) if compare_models else 1
|
167 |
-
|
168 |
-
if st.button("Start Training"):
|
169 |
-
with st.spinner("Training in progress..."):
|
170 |
-
if compare_models:
|
171 |
-
models = compare_models(n_select=n_models)
|
172 |
-
st.session_state.best_model = models[0]
|
173 |
-
else:
|
174 |
-
st.session_state.best_model = create_model()
|
175 |
-
|
176 |
-
# Capture metrics
|
177 |
-
results = pull()
|
178 |
-
st.session_state.metrics = results.to_dict()
|
179 |
-
st.success(f"Best Model: {st.session_state.best_model}")
|
180 |
-
|
181 |
-
# Log to MLflow
|
182 |
-
with mlflow.start_run():
|
183 |
-
mlflow.log_metrics(results.iloc[0].to_dict())
|
184 |
-
mlflow.sklearn.log_model(st.session_state.best_model, "model")
|
185 |
-
|
186 |
-
def visualization_page():
|
187 |
-
st.title("🔍 Model Evaluation Center")
|
188 |
-
|
189 |
-
if 'best_model' not in st.session_state:
|
190 |
-
st.warning("Train a model first!")
|
191 |
-
return
|
192 |
-
|
193 |
-
st.subheader("Performance Analysis")
|
194 |
-
|
195 |
-
if st.session_state.problem_type == "Classification":
|
196 |
-
visualize_classification()
|
197 |
-
elif st.session_state.problem_type == "Regression":
|
198 |
-
visualize_regression()
|
199 |
-
else:
|
200 |
-
visualize_clustering()
|
201 |
-
|
202 |
-
st.subheader("Metric Analysis")
|
203 |
-
st.dataframe(pd.DataFrame.from_dict(st.session_state.metrics))
|
204 |
-
|
205 |
-
if st.button("Request AI Analysis"):
|
206 |
-
analysis = handle_ai_query("Analyze these model metrics")
|
207 |
-
st.markdown(f"**AI Analysis:**\n\n{analysis}")
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
st.markdown("---")
|
242 |
-
st.
|
|
|
|
|
|
|
243 |
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
elif "Model Training Studio" in page:
|
248 |
-
model_training_page()
|
249 |
-
else:
|
250 |
-
visualization_page()
|
251 |
|
252 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# app_combined.py
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
|
|
4 |
import numpy as np
|
5 |
+
import plotly.express as px
|
6 |
+
import plotly.graph_objects as go
|
|
|
7 |
from ydata_profiling import ProfileReport
|
8 |
from streamlit_pandas_profiling import st_profile_report
|
|
|
9 |
import requests
|
10 |
import json
|
11 |
+
from datetime import datetime
|
12 |
+
import re
|
13 |
+
import tempfile
|
14 |
+
from scipy import stats
|
15 |
+
from sklearn.impute import SimpleImputer
|
16 |
+
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
|
17 |
+
from sklearn.decomposition import PCA
|
18 |
+
import streamlit.components.v1 as components
|
19 |
+
from io import StringIO
|
20 |
+
from dotenv import load_dotenv
|
21 |
+
from flask import Flask, request, jsonify
|
22 |
+
from flask_cors import CORS
|
23 |
+
import openai
|
24 |
import os
|
25 |
|
26 |
+
# Load environment variables
|
27 |
+
load_dotenv()
|
28 |
+
|
29 |
+
# Flask server setup
|
30 |
+
app = Flask(__name__)
|
31 |
+
CORS(app)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
+
# Configure DeepSeek API
|
34 |
+
openai.api_key = os.getenv("DEEPSEEK_API_KEY")
|
35 |
+
openai.api_base = "https://api.deepseek.com/v1"
|
36 |
+
|
37 |
+
# System prompt for the AI assistant
|
38 |
+
SYSTEM_PROMPT = '''
|
39 |
+
You are Neural Analyst, an AI assistant for the Neural-Vision Enhanced analytics platform.
|
40 |
+
Your capabilities include:
|
41 |
+
1. Explaining model metrics and evaluation visualizations
|
42 |
+
2. Interpreting dataset statistics and EDA reports
|
43 |
+
3. Guiding users through app functionality
|
44 |
+
4. Providing data science insights
|
45 |
+
5. Comparing different model performances
|
46 |
+
Always consider:
|
47 |
+
- Current dataset statistics: {dataset_stats}
|
48 |
+
- Active problem type: {problem_type}
|
49 |
+
- Model metrics: {metrics}
|
50 |
+
- App state: {active_page}
|
51 |
+
'''
|
52 |
+
|
53 |
+
@app.route('/analyze', methods=['POST'])
|
54 |
+
def analyze():
|
55 |
try:
|
56 |
+
data = request.json
|
57 |
+
context = json.loads(data['context'])
|
58 |
+
|
59 |
+
# Construct the prompt for DeepSeek
|
60 |
+
prompt = f'''
|
61 |
+
User Query: {data['prompt']}
|
62 |
+
|
63 |
+
Current Context:
|
64 |
+
- Active Page: {context['current_state']['active_page']}
|
65 |
+
- Problem Type: {context['current_state']['problem_type']}
|
66 |
+
- Target Variable: {context['current_state']['target']}
|
67 |
+
- Dataset Shape: {context['current_state']['dataset_stats'].get('rows', 0)} rows,
|
68 |
+
{context['current_state']['dataset_stats'].get('columns', 0)} columns
|
69 |
+
- Model Metrics: {json.dumps(context['current_state']['model_metrics'])}
|
70 |
+
'''
|
71 |
+
|
72 |
+
# Call DeepSeek API
|
73 |
+
response = openai.ChatCompletion.create(
|
74 |
+
model="deepseek-chat",
|
75 |
+
messages=[{
|
76 |
+
"role": "system",
|
77 |
+
"content": SYSTEM_PROMPT.format(**context['current_state'])
|
78 |
+
}, {
|
79 |
+
"role": "user",
|
80 |
+
"content": prompt
|
81 |
+
}],
|
82 |
+
temperature=0.3,
|
83 |
+
max_tokens=500
|
84 |
)
|
85 |
+
|
86 |
+
return jsonify({"analysis": response.choices[0].message.content})
|
87 |
+
|
88 |
except Exception as e:
|
89 |
+
return jsonify({"error": str(e)}), 500
|
90 |
|
91 |
+
# Streamlit app
|
92 |
+
def run_streamlit_app():
|
93 |
+
# Flask server URL
|
94 |
+
FLASK_URL = "http://localhost:5000/analyze"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
+
# Helper Functions
|
97 |
+
def enhance_section_title(title):
|
98 |
+
st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{title}</h2>", unsafe_allow_html=True)
|
99 |
+
|
100 |
+
def convert_csv_to_json_and_text(df):
|
101 |
+
"""Convert DataFrame to JSON and then to plain text."""
|
102 |
+
json_data = df.to_json(orient="records")
|
103 |
+
data_dict = json.loads(json_data)
|
104 |
+
|
105 |
+
text_summary = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
|
106 |
+
text_summary += f"Missing Values: {df.isna().sum().sum()}\n"
|
107 |
+
text_summary += "Columns:\n"
|
108 |
+
for col in df.columns:
|
109 |
+
text_summary += f"- {col} ({df[col].dtype}): "
|
110 |
+
if pd.api.types.is_numeric_dtype(df[col]):
|
111 |
+
text_summary += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
|
|
|
|
|
|
|
|
|
|
|
112 |
else:
|
113 |
+
text_summary += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
|
114 |
+
text_summary += f", Missing={df[col].isna().sum()}\n"
|
115 |
+
return text_summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
+
def get_chatbot_response(user_input, app_mode, dataset_text=""):
|
118 |
+
"""Send request to Flask server for chatbot response."""
|
119 |
+
payload = {
|
120 |
+
"user_input": user_input,
|
121 |
+
"app_mode": app_mode,
|
122 |
+
"dataset_text": dataset_text
|
123 |
+
}
|
124 |
+
try:
|
125 |
+
response = requests.post(FLASK_URL, json=payload)
|
126 |
+
response.raise_for_status()
|
127 |
+
return response.json().get("response", "Error: No response from server")
|
128 |
+
except requests.exceptions.RequestException as e:
|
129 |
+
return f"Error: Could not connect to Flask server. {str(e)}"
|
130 |
+
|
131 |
+
# Sidebar Navigation
|
132 |
+
with st.sidebar:
|
133 |
+
st.title("🔮 Data-Vision Pro")
|
134 |
+
st.markdown("Your AI-powered data analysis suite.")
|
135 |
+
st.markdown("---")
|
136 |
+
app_mode = st.selectbox(
|
137 |
+
"Navigation",
|
138 |
+
["Data Upload", "Data Cleaning", "EDA"],
|
139 |
+
format_func=lambda x: f"📌 {x}"
|
140 |
+
)
|
141 |
+
if app_mode == "Data Upload":
|
142 |
+
st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
|
143 |
+
elif app_mode == "Data Cleaning":
|
144 |
+
st.info("🧹 Clean and preprocess your data using various tools.")
|
145 |
+
elif app_mode == "EDA":
|
146 |
+
st.info("🔍 Explore your data visually and statistically.")
|
147 |
+
|
148 |
+
st.markdown("---")
|
149 |
+
st.markdown("**Note**: Requires `ydata-profiling`, `requests`, `flask`. Install via `pip install ydata-profiling requests flask`.")
|
150 |
+
if 'cleaned_data' in st.session_state:
|
151 |
+
csv = st.session_state.cleaned_data.to_csv(index=False)
|
152 |
+
st.download_button(
|
153 |
+
label="Download Cleaned Data as CSV",
|
154 |
+
data=csv,
|
155 |
+
file_name='cleaned_data.csv',
|
156 |
+
mime='text/csv',
|
157 |
+
)
|
158 |
+
st.markdown("Created by Calvin Allen-Crawford")
|
159 |
+
st.markdown("v1.0 | © 2025")
|
160 |
+
|
161 |
+
# Main App Pages
|
162 |
+
if app_mode == "Data Upload":
|
163 |
+
st.title("📤 Data Upload & Analysis")
|
164 |
+
uploaded_file = st.file_uploader("Upload Dataset", type=["csv"])
|
165 |
|
166 |
+
if uploaded_file:
|
167 |
+
try:
|
168 |
+
df = pd.read_csv(uploaded_file)
|
169 |
+
st.session_state.df = df
|
170 |
+
st.session_state.metrics = {}
|
171 |
+
|
172 |
+
st.subheader("Dataset Health Check")
|
173 |
+
col1, col2, col3 = st.columns(3)
|
174 |
+
col1.metric("Total Samples", df.shape[0])
|
175 |
+
col2.metric("Features", df.shape[1])
|
176 |
+
col3.metric("Missing Values", df.isna().sum().sum())
|
177 |
+
|
178 |
+
if st.button("Generate Full Profile Report"):
|
179 |
+
with st.spinner("Generating report..."):
|
180 |
+
pr = ProfileReport(df, explorative=True)
|
181 |
+
st_profile_report(pr)
|
182 |
+
except Exception as e:
|
183 |
+
st.error(f"Error reading the file: {str(e)}")
|
184 |
+
|
185 |
+
elif app_mode == "Data Cleaning":
|
186 |
+
st.title("🧹 Smart Data Cleaning")
|
187 |
+
st.header("Preprocess and Transform Your Data")
|
188 |
+
if 'raw_data' not in st.session_state:
|
189 |
+
st.warning("Please upload data first in the Data Upload section.")
|
190 |
+
st.stop()
|
191 |
+
if 'cleaned_data' not in st.session_state:
|
192 |
+
st.session_state.cleaned_data = st.session_state.raw_data.copy()
|
193 |
+
df = st.session_state.cleaned_data.copy()
|
194 |
+
|
195 |
+
enhance_section_title("📊 Data Health Dashboard")
|
196 |
+
with st.expander("Explore Data Health Metrics", expanded=True):
|
197 |
+
col1, col2, col3 = st.columns(3)
|
198 |
+
with col1: st.metric("Columns", len(df.columns))
|
199 |
+
with col2: st.metric("Rows", len(df))
|
200 |
+
with col3: st.metric("Missing Values", df.isna().sum().sum())
|
201 |
+
if st.button("Generate Detailed Health Report"):
|
202 |
+
with st.spinner("Generating report..."):
|
203 |
+
profile = ProfileReport(df, minimal=True)
|
204 |
+
st_profile_report(profile)
|
205 |
+
if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
|
206 |
+
if st.button("Undo Last Action"):
|
207 |
+
st.session_state.data_versions.pop()
|
208 |
+
st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
|
209 |
+
st.session_state.dataset_text = convert_csv_to_json_and_text(st.session_state.cleaned_data)
|
210 |
+
st.rerun()
|
211 |
+
|
212 |
+
elif app_mode == "EDA":
|
213 |
+
st.title("🔍 Interactive Data Explorer")
|
214 |
+
if 'cleaned_data' not in st.session_state:
|
215 |
+
st.warning("Please upload and clean data first.")
|
216 |
+
st.stop()
|
217 |
+
df = st.session_state.cleaned_data.copy()
|
218 |
+
|
219 |
+
enhance_section_title("Dataset Overview")
|
220 |
+
with st.container():
|
221 |
+
col1, col2, col3, col4 = st.columns(4)
|
222 |
+
col1.metric("Total Rows", df.shape[0])
|
223 |
+
col2.metric("Total Columns", df.shape[1])
|
224 |
+
missing_percentage = df.isna().sum().sum() / df.size * 100
|
225 |
+
col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
|
226 |
+
col4.metric("Duplicates", df.duplicated().sum())
|
227 |
+
|
228 |
+
# Chatbot Section
|
229 |
st.markdown("---")
|
230 |
+
st.subheader("💬 AI Chatbot Assistant")
|
231 |
+
st.info("Ask me about the app or your data! Try: 'What can I do here?' or 'What's in the dataset?'")
|
232 |
+
if "chat_history" not in st.session_state:
|
233 |
+
st.session_state.chat_history = []
|
234 |
|
235 |
+
for message in st.session_state.chat_history:
|
236 |
+
with st.chat_message(message["role"]):
|
237 |
+
st.markdown(message["content"])
|
|
|
|
|
|
|
|
|
238 |
|
239 |
+
user_input = st.chat_input("Ask me anything about the app or your data...")
|
240 |
+
if user_input:
|
241 |
+
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
242 |
+
with st.chat_message("user"):
|
243 |
+
st.markdown(user_input)
|
244 |
+
|
245 |
+
with st.spinner("Thinking..."):
|
246 |
+
dataset_text = st.session_state.get("dataset_text", "")
|
247 |
+
response = get_chatbot_response(user_input, app_mode, dataset_text)
|
248 |
+
st.session_state.chat_history.append({"role": "assistant", "content": response})
|
249 |
+
with st.chat_message("assistant"):
|
250 |
+
st.markdown(response)
|
251 |
+
|
252 |
+
if __name__ == '__main__':
|
253 |
+
# Run Flask server in a separate thread
|
254 |
+
from threading import Thread
|
255 |
+
flask_thread = Thread(target=lambda: app.run(host='0.0.0.0', port=5000))
|
256 |
+
flask_thread.start()
|
257 |
+
|
258 |
+
# Run Streamlit app
|
259 |
+
run_streamlit_app()
|