Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import plotly.express as px
|
5 |
-
|
6 |
from ydata_profiling import ProfileReport
|
7 |
from streamlit_pandas_profiling import st_profile_report
|
8 |
import os
|
@@ -14,11 +14,10 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
14 |
from langchain.embeddings import HuggingFaceEmbeddings
|
15 |
import re
|
16 |
from scipy import stats
|
17 |
-
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
18 |
import tempfile
|
19 |
-
import json
|
20 |
|
21 |
-
# Set page config
|
22 |
st.set_page_config(page_title="Data-Vision Pro", layout="wide")
|
23 |
|
24 |
# Load environment variables
|
@@ -27,160 +26,172 @@ load_dotenv()
|
|
27 |
# Initialize Groq client
|
28 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
29 |
|
30 |
-
# Initialize HuggingFace embeddings
|
31 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
32 |
|
33 |
-
# Custom
|
34 |
-
|
35 |
-
<style>
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
def update_cleaned_data(df):
|
178 |
st.session_state.cleaned_data = df
|
179 |
if 'data_versions' not in st.session_state:
|
180 |
st.session_state.data_versions = [st.session_state.raw_data.copy()]
|
181 |
st.session_state.data_versions.append(df.copy())
|
182 |
st.session_state.dataset_text = convert_df_to_text(df)
|
183 |
-
|
|
|
184 |
|
185 |
def convert_df_to_text(df):
|
186 |
text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
|
@@ -227,33 +238,61 @@ def extract_plot_data(plot_info, df):
|
|
227 |
x_col = plot_info["x"]
|
228 |
y_col = plot_info["y"] if "y" in plot_info else None
|
229 |
data = pd.read_json(plot_info["data"])
|
230 |
-
plot_text = f"Plot Type: {plot_type}\
|
|
|
231 |
if y_col:
|
232 |
plot_text += f"Y-Axis: {y_col}\n"
|
233 |
if plot_type == "Scatter Plot" and y_col:
|
234 |
correlation = data[x_col].corr(data[y_col])
|
235 |
slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
|
236 |
-
plot_text += f"Correlation: {correlation:.2f}\
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
237 |
return plot_text
|
238 |
|
239 |
def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
|
240 |
system_prompt = (
|
241 |
-
|
242 |
-
"
|
243 |
-
"- Data
|
244 |
-
"-
|
245 |
-
"
|
|
|
246 |
)
|
247 |
context = ""
|
248 |
if vector_store:
|
249 |
docs = vector_store.similarity_search(user_input, k=3)
|
250 |
if docs:
|
251 |
-
context = "\n\
|
|
|
|
|
|
|
252 |
try:
|
253 |
response = client.chat.completions.create(
|
254 |
model=model,
|
255 |
messages=[
|
256 |
-
{"role": "system", "content": system_prompt
|
257 |
{"role": "user", "content": user_input}
|
258 |
],
|
259 |
temperature=0.7,
|
@@ -263,164 +302,379 @@ def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-
|
|
263 |
except Exception as e:
|
264 |
return f"Error: {str(e)}"
|
265 |
|
266 |
-
|
267 |
-
|
268 |
-
if
|
269 |
-
|
270 |
-
|
271 |
-
if
|
272 |
-
|
|
|
273 |
update_cleaned_data(df)
|
274 |
-
return
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
if match:
|
279 |
-
x, y = match.group(1).strip(), match.group(2).strip()
|
280 |
-
if x in df.columns and y in df.columns:
|
281 |
-
fig = px.scatter(df, x=x, y=y)
|
282 |
-
plot_info = {"type": "Scatter Plot", "x": x, "y": y, "data": df[[x, y]].to_json()}
|
283 |
-
return df, fig, plot_info
|
284 |
-
return df, None, "Invalid scatter plot command."
|
285 |
-
elif "histogram of" in command:
|
286 |
-
col = command.replace("histogram of", "").strip()
|
287 |
-
if col in df.columns:
|
288 |
-
fig = px.histogram(df, x=col)
|
289 |
-
plot_info = {"type": "Histogram", "x": col, "data": df[[col]].to_json()}
|
290 |
-
return df, fig, plot_info
|
291 |
-
return df, None, "Invalid histogram command."
|
292 |
-
elif "analyze plot" in command and "last_plot" in st.session_state:
|
293 |
-
plot_info = st.session_state.last_plot
|
294 |
-
plot_text = extract_plot_data(plot_info, df)
|
295 |
-
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
296 |
-
return df, plot_text
|
297 |
-
return df, None, None
|
298 |
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
303 |
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
|
|
309 |
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
with st.sidebar:
|
312 |
st.markdown("### 🔮 Data-Vision Pro")
|
313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
314 |
if 'cleaned_data' in st.session_state:
|
315 |
csv = st.session_state.cleaned_data.to_csv(index=False)
|
316 |
-
st.download_button(
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
# Initialize Session State
|
319 |
if 'vector_store' not in st.session_state:
|
320 |
st.session_state.vector_store = None
|
321 |
if 'chat_history' not in st.session_state:
|
322 |
st.session_state.chat_history = []
|
323 |
-
|
324 |
-
#
|
325 |
-
|
326 |
-
|
|
|
|
|
|
|
|
|
327 |
if uploaded_file:
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
with st.
|
349 |
-
|
350 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
|
352 |
-
elif
|
353 |
-
|
354 |
-
|
|
|
|
|
|
|
|
|
355 |
else:
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
""
|
364 |
-
st.
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
372 |
if 'cleaned_data' not in st.session_state:
|
373 |
-
st.warning("Please upload data first.")
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
424 |
|
425 |
if __name__ == "__main__":
|
426 |
main()
|
|
|
2 |
import pandas as pd
|
3 |
import numpy as np
|
4 |
import plotly.express as px
|
5 |
+
import plotly.graph_objects as go
|
6 |
from ydata_profiling import ProfileReport
|
7 |
from streamlit_pandas_profiling import st_profile_report
|
8 |
import os
|
|
|
14 |
from langchain.embeddings import HuggingFaceEmbeddings
|
15 |
import re
|
16 |
from scipy import stats
|
17 |
+
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
|
18 |
import tempfile
|
|
|
19 |
|
20 |
+
# Set page config as the first Streamlit command
|
21 |
st.set_page_config(page_title="Data-Vision Pro", layout="wide")
|
22 |
|
23 |
# Load environment variables
|
|
|
26 |
# Initialize Groq client
|
27 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
28 |
|
29 |
+
# Initialize HuggingFace embeddings for FAISS
|
30 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
31 |
|
32 |
+
# Custom CSS with Modernized Silver, Blue, and Gold Theme + Responsiveness
|
33 |
+
st.markdown("""
|
34 |
+
<style>
|
35 |
+
:root {
|
36 |
+
--silver-light: #D8D8D8;
|
37 |
+
--silver-dark: #B8B8B8;
|
38 |
+
--blue: #5C89BC;
|
39 |
+
--blue-dark: #4E73A0;
|
40 |
+
--blue-light: #6EA8E0;
|
41 |
+
--gold: #A87E01;
|
42 |
+
--text-color: #333333;
|
43 |
+
--shadow-color: rgba(0,0,0,0.1);
|
44 |
+
--shadow-color-stronger: rgba(0,0,0,0.2);
|
45 |
+
}
|
46 |
+
.stApp {
|
47 |
+
background: linear-gradient(135deg, var(--silver-light) 0%, var(--silver-dark) 100%);
|
48 |
+
font-family: 'Inter', sans-serif;
|
49 |
+
max-width: 900px;
|
50 |
+
margin: 0 auto;
|
51 |
+
padding: 10px;
|
52 |
+
transition: all 0.3s ease;
|
53 |
+
}
|
54 |
+
.header {
|
55 |
+
background: linear-gradient(90deg, var(--blue) 80%, var(--blue-dark) 100%);
|
56 |
+
color: white;
|
57 |
+
padding: 20px;
|
58 |
+
border-radius: 16px 16px 0 0;
|
59 |
+
box-shadow: 0 4px 12px var(--shadow-color);
|
60 |
+
text-align: center;
|
61 |
+
transition: transform 0.2s ease;
|
62 |
+
}
|
63 |
+
.header:hover {
|
64 |
+
transform: translateY(-2px);
|
65 |
+
box-shadow: 0 4px 12px var(--shadow-color-stronger);
|
66 |
+
}
|
67 |
+
.header-title {
|
68 |
+
font-size: 1.5rem;
|
69 |
+
font-weight: 700;
|
70 |
+
margin: 0;
|
71 |
+
}
|
72 |
+
.header-subtitle {
|
73 |
+
font-size: 0.9rem;
|
74 |
+
margin-top: 8px;
|
75 |
+
opacity: 0.9;
|
76 |
+
}
|
77 |
+
.sidebar .sidebar-content {
|
78 |
+
background-color: white;
|
79 |
+
border-radius: 16px;
|
80 |
+
box-shadow: 0 6px 16px var(--shadow-color);
|
81 |
+
padding: 20px;
|
82 |
+
transition: box-shadow 0.3s ease;
|
83 |
+
}
|
84 |
+
.sidebar .sidebar-content:hover {
|
85 |
+
box-shadow: 0 8px 20px var(--shadow-color-stronger);
|
86 |
+
}
|
87 |
+
.chat-container {
|
88 |
+
background-color: white;
|
89 |
+
border-radius: 16px;
|
90 |
+
box-shadow: 0 6px 16px var(--shadow-color);
|
91 |
+
padding: 20px;
|
92 |
+
margin-top: 25px;
|
93 |
+
transition: box-shadow 0.3s ease;
|
94 |
+
}
|
95 |
+
.chat-container:hover {
|
96 |
+
box-shadow: 0 8px 20px var(--shadow-color-stronger);
|
97 |
+
}
|
98 |
+
.user-message {
|
99 |
+
background: linear-gradient(45deg, var(--blue), var(--blue-light));
|
100 |
+
color: white;
|
101 |
+
border-radius: 20px 20px 6px 20px;
|
102 |
+
padding: 14px 18px;
|
103 |
+
margin-left: auto;
|
104 |
+
max-width: 80%;
|
105 |
+
margin-bottom: 12px;
|
106 |
+
box-shadow: 0 2px 8px var(--blue-dark);
|
107 |
+
transition: transform 0.2s ease;
|
108 |
+
}
|
109 |
+
.user-message:hover {
|
110 |
+
transform: scale(1.02);
|
111 |
+
}
|
112 |
+
.bot-message {
|
113 |
+
background-color: #F0F0F0;
|
114 |
+
color: var(--text-color);
|
115 |
+
border-radius: 20px 20px 20px 6px;
|
116 |
+
padding: 14px 18px;
|
117 |
+
margin-right: auto;
|
118 |
+
max-width: 80%;
|
119 |
+
margin-bottom: 12px;
|
120 |
+
box-shadow: 0 2px 8px var(--shadow-color);
|
121 |
+
transition: transform 0.2s ease;
|
122 |
+
}
|
123 |
+
.bot-message:hover {
|
124 |
+
transform: scale(1.02);
|
125 |
+
}
|
126 |
+
.footer {
|
127 |
+
text-align: center;
|
128 |
+
margin-top: 20px;
|
129 |
+
color: var(--text-color);
|
130 |
+
font-size: 0.8rem;
|
131 |
+
}
|
132 |
+
.tech-badge {
|
133 |
+
display: inline-block;
|
134 |
+
background-color: #E6ECEF;
|
135 |
+
color: var(--blue);
|
136 |
+
padding: 4px 8px;
|
137 |
+
border-radius: 12px;
|
138 |
+
font-size: 0.7rem;
|
139 |
+
margin: 0 4px;
|
140 |
+
}
|
141 |
+
h2 {
|
142 |
+
color: var(--blue);
|
143 |
+
border-bottom: 2px solid var(--gold);
|
144 |
+
padding-bottom: 5px;
|
145 |
+
font-size: 1.5rem;
|
146 |
+
font-weight: 700;
|
147 |
+
}
|
148 |
+
.stButton > button {
|
149 |
+
background-color: var(--gold);
|
150 |
+
color: white;
|
151 |
+
border-radius: 12px;
|
152 |
+
padding: 10px 20px;
|
153 |
+
border: none;
|
154 |
+
box-shadow: 0 4px 12px var(--shadow-color);
|
155 |
+
font-weight: 600;
|
156 |
+
transition: all 0.3s ease;
|
157 |
+
}
|
158 |
+
.stButton > button:hover {
|
159 |
+
background-color: #8C6B01;
|
160 |
+
transform: translateY(-2px);
|
161 |
+
box-shadow: 0 6px 16px var(--shadow-color-stronger);
|
162 |
+
}
|
163 |
+
@media (max-width: 768px) {
|
164 |
+
.header-title {
|
165 |
+
font-size: 1.2rem;
|
166 |
+
}
|
167 |
+
.header-subtitle {
|
168 |
+
font-size: 0.8rem;
|
169 |
+
}
|
170 |
+
.chat-container, .sidebar .sidebar-content {
|
171 |
+
padding: 10px;
|
172 |
+
}
|
173 |
+
.stApp {
|
174 |
+
padding: 5px;
|
175 |
+
}
|
176 |
+
h2 {
|
177 |
+
font-size: 1.2rem;
|
178 |
+
}
|
179 |
+
}
|
180 |
+
</style>
|
181 |
+
""", unsafe_allow_html=True)
|
182 |
+
|
183 |
+
# Helper Functions
|
184 |
+
def enhance_section_title(title):
|
185 |
+
st.markdown(f"<h2 style='border-bottom: 2px solid var(--gold); padding-bottom: 5px; color: var(--blue);'>{title}</h2>", unsafe_allow_html=True)
|
186 |
+
|
187 |
def update_cleaned_data(df):
|
188 |
st.session_state.cleaned_data = df
|
189 |
if 'data_versions' not in st.session_state:
|
190 |
st.session_state.data_versions = [st.session_state.raw_data.copy()]
|
191 |
st.session_state.data_versions.append(df.copy())
|
192 |
st.session_state.dataset_text = convert_df_to_text(df)
|
193 |
+
st.success("✅ Action completed successfully!")
|
194 |
+
st.rerun()
|
195 |
|
196 |
def convert_df_to_text(df):
|
197 |
text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
|
|
|
238 |
x_col = plot_info["x"]
|
239 |
y_col = plot_info["y"] if "y" in plot_info else None
|
240 |
data = pd.read_json(plot_info["data"])
|
241 |
+
plot_text = f"Plot Type: {plot_type}\n"
|
242 |
+
plot_text += f"X-Axis: {x_col}\n"
|
243 |
if y_col:
|
244 |
plot_text += f"Y-Axis: {y_col}\n"
|
245 |
if plot_type == "Scatter Plot" and y_col:
|
246 |
correlation = data[x_col].corr(data[y_col])
|
247 |
slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
|
248 |
+
plot_text += f"Correlation: {correlation:.2f}\n"
|
249 |
+
plot_text += f"Linear Regression: Slope={slope:.2f}, Intercept={intercept:.2f}, R²={r_value**2:.2f}, p-value={p_value:.4f}\n"
|
250 |
+
plot_text += f"X Stats: Mean={data[x_col].mean():.2f}, Std={data[x_col].std():.2f}, Min={data[x_col].min():.2f}, Max={data[x_col].max():.2f}\n"
|
251 |
+
plot_text += f"Y Stats: Mean={data[y_col].mean():.2f}, Std={data[y_col].std():.2f}, Min={data[y_col].min():.2f}, Max={data[y_col].max():.2f}\n"
|
252 |
+
elif plot_type == "Histogram":
|
253 |
+
plot_text += f"Stats: Mean={data[x_col].mean():.2f}, Median={data[x_col].median():.2f}, Std={data[x_col].std():.2f}\n"
|
254 |
+
plot_text += f"Skewness: {data[x_col].skew():.2f}\n"
|
255 |
+
plot_text += f"Range: [{data[x_col].min():.2f}, {data[x_col].max():.2f}]\n"
|
256 |
+
elif plot_type == "Box Plot" and y_col:
|
257 |
+
q1, q3 = data[y_col].quantile(0.25), data[y_col].quantile(0.75)
|
258 |
+
iqr = q3 - q1
|
259 |
+
plot_text += f"Y Stats: Median={data[y_col].median():.2f}, Q1={q1:.2f}, Q3={q3:.2f}, IQR={iqr:.2f}\n"
|
260 |
+
plot_text += f"Outliers: {len(data[y_col][(data[y_col] < q1 - 1.5 * iqr) | (data[y_col] > q3 + 1.5 * iqr)])} potential outliers\n"
|
261 |
+
elif plot_type == "Line Chart" and y_col:
|
262 |
+
plot_text += f"Y Stats: Mean={data[y_col].mean():.2f}, Std={data[y_col].std():.2f}, Trend={'increasing' if data[y_col].iloc[-1] > data[y_col].iloc[0] else 'decreasing'}\n"
|
263 |
+
elif plot_type == "Bar Chart":
|
264 |
+
plot_text += f"Counts: {data[x_col].value_counts().to_dict()}\n"
|
265 |
+
elif plot_type == "Correlation Matrix":
|
266 |
+
corr = data.corr()
|
267 |
+
plot_text += "Correlation Matrix:\n"
|
268 |
+
for col1 in corr.columns:
|
269 |
+
for col2 in corr.index:
|
270 |
+
if col1 < col2:
|
271 |
+
plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
|
272 |
return plot_text
|
273 |
|
274 |
def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
|
275 |
system_prompt = (
|
276 |
+
"You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
|
277 |
+
f"The user is on the '{app_mode}' page:\n"
|
278 |
+
"- **Data Upload**: Upload CSV/XLSX files, view stats, or generate reports.\n"
|
279 |
+
"- **Data Cleaning**: Clean data (e.g., handle missing values, encode variables).\n"
|
280 |
+
"- **EDA**: Visualize data (e.g., scatter plots, histograms) and analyze plots.\n"
|
281 |
+
"When analyzing plots, provide detailed insights based on numerical data extracted from them."
|
282 |
)
|
283 |
context = ""
|
284 |
if vector_store:
|
285 |
docs = vector_store.similarity_search(user_input, k=3)
|
286 |
if docs:
|
287 |
+
context = "\n\nDataset and Plot Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
|
288 |
+
system_prompt += f"Use this dataset and plot context to augment your response:\n{context}"
|
289 |
+
else:
|
290 |
+
system_prompt += "No dataset or plot data is loaded. Assist based on app functionality."
|
291 |
try:
|
292 |
response = client.chat.completions.create(
|
293 |
model=model,
|
294 |
messages=[
|
295 |
+
{"role": "system", "content": system_prompt},
|
296 |
{"role": "user", "content": user_input}
|
297 |
],
|
298 |
temperature=0.7,
|
|
|
302 |
except Exception as e:
|
303 |
return f"Error: {str(e)}"
|
304 |
|
305 |
+
# Command Functions
|
306 |
+
def drop_columns(columns):
|
307 |
+
if 'cleaned_data' in st.session_state:
|
308 |
+
df = st.session_state.cleaned_data.copy()
|
309 |
+
columns_to_drop = [col.strip() for col in columns.split(',')]
|
310 |
+
valid_columns = [col for col in columns_to_drop if col in df.columns]
|
311 |
+
if valid_columns:
|
312 |
+
df.drop(valid_columns, axis=1, inplace=True)
|
313 |
update_cleaned_data(df)
|
314 |
+
return f"Dropped columns: {', '.join(valid_columns)}"
|
315 |
+
else:
|
316 |
+
return "No valid columns found to drop."
|
317 |
+
return "No dataset loaded."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
+
def generate_scatter_plot(params):
|
320 |
+
df = st.session_state.cleaned_data
|
321 |
+
match = re.search(r"([\w\s]+)\s+vs\s+([\w\s]+)", params)
|
322 |
+
if match and len(match.groups()) >= 2:
|
323 |
+
x_axis, y_axis = match.group(1).strip(), match.group(2).strip()
|
324 |
+
if x_axis in df.columns and y_axis in df.columns:
|
325 |
+
fig = px.scatter(df, x=x_axis, y=y_axis, title=f'Scatter Plot of {x_axis} vs {y_axis}')
|
326 |
+
st.plotly_chart(fig)
|
327 |
+
st.session_state.last_plot = {"type": "Scatter Plot", "x": x_axis, "y": y_axis, "data": df[[x_axis, y_axis]].to_json()}
|
328 |
+
return f"Generated scatter plot of {x_axis} vs {y_axis}"
|
329 |
+
return "Invalid columns for scatter plot."
|
330 |
+
|
331 |
+
def generate_histogram(params):
|
332 |
+
df = st.session_state.cleaned_data
|
333 |
+
x_axis = params.strip()
|
334 |
+
if x_axis in df.columns:
|
335 |
+
fig = px.histogram(df, x=x_axis, title=f'Histogram of {x_axis}')
|
336 |
+
st.plotly_chart(fig)
|
337 |
+
st.session_state.last_plot = {"type": "Histogram", "x": x_axis, "data": df[[x_axis]].to_json()}
|
338 |
+
return f"Generated histogram of {x_axis}"
|
339 |
+
return "Invalid column for histogram."
|
340 |
+
|
341 |
+
def analyze_plot():
|
342 |
+
if "last_plot" not in st.session_state:
|
343 |
+
return "No plot available to analyze."
|
344 |
+
plot_info = st.session_state.last_plot
|
345 |
+
df = pd.read_json(plot_info["data"])
|
346 |
+
plot_text = extract_plot_data(plot_info, df)
|
347 |
+
return f"Analysis of the last plot:\n{plot_text}"
|
348 |
+
|
349 |
+
def parse_command(command):
|
350 |
+
command = command.lower().strip()
|
351 |
+
if "drop columns" in command or "drop column" in command:
|
352 |
+
columns = command.replace("drop columns", "").replace("drop column", "").strip()
|
353 |
+
return drop_columns, columns
|
354 |
+
elif "show a scatter plot" in command or "scatter plot of" in command:
|
355 |
+
params = command.replace("show a scatter plot of", "").replace("scatter plot of", "").strip()
|
356 |
+
return generate_scatter_plot, params
|
357 |
+
elif "show a histogram" in command or "histogram of" in command:
|
358 |
+
params = command.replace("show a histogram of", "").replace("histogram of", "").strip()
|
359 |
+
return generate_histogram, params
|
360 |
+
elif "analyze plot" in command:
|
361 |
+
return lambda x: analyze_plot(), None
|
362 |
+
return None, command
|
363 |
|
364 |
+
# Dataset Preview Function
|
365 |
+
def display_dataset_preview():
|
366 |
+
if 'cleaned_data' in st.session_state:
|
367 |
+
st.subheader("Current Dataset Preview")
|
368 |
+
st.dataframe(st.session_state.cleaned_data.head(10), use_container_width=True)
|
369 |
+
st.markdown("---")
|
370 |
|
371 |
+
# Main App
|
372 |
+
def main():
|
373 |
+
# Header
|
374 |
+
st.markdown("""
|
375 |
+
<div class="header">
|
376 |
+
<h1 class="header-title">Data-Vision Pro</h1>
|
377 |
+
<div class="header-subtitle">Advanced Data Analysis with Groq Inference</div>
|
378 |
+
</div>
|
379 |
+
""", unsafe_allow_html=True)
|
380 |
+
|
381 |
+
# Sidebar Navigation
|
382 |
with st.sidebar:
|
383 |
st.markdown("### 🔮 Data-Vision Pro")
|
384 |
+
st.markdown("Your AI-powered data analysis suite with RAG.")
|
385 |
+
st.markdown("---")
|
386 |
+
app_mode = st.selectbox(
|
387 |
+
"Navigation",
|
388 |
+
["Data Upload", "Data Cleaning", "EDA"],
|
389 |
+
format_func=lambda x: f"📌 {x}"
|
390 |
+
)
|
391 |
+
model = st.selectbox(
|
392 |
+
"Select Groq Model",
|
393 |
+
["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"],
|
394 |
+
index=0
|
395 |
+
)
|
396 |
+
if app_mode == "Data Upload":
|
397 |
+
st.info("⬆️ Upload your CSV or XLSX dataset to begin.")
|
398 |
+
elif app_mode == "Data Cleaning":
|
399 |
+
st.info("🧹 Clean and preprocess your data.")
|
400 |
+
elif app_mode == "EDA":
|
401 |
+
st.info("🔍 Explore your data visually.")
|
402 |
+
|
403 |
if 'cleaned_data' in st.session_state:
|
404 |
csv = st.session_state.cleaned_data.to_csv(index=False)
|
405 |
+
st.download_button(
|
406 |
+
label="Download Cleaned Data",
|
407 |
+
data=csv,
|
408 |
+
file_name='cleaned_data.csv',
|
409 |
+
mime='text/csv',
|
410 |
+
)
|
411 |
+
st.markdown("---")
|
412 |
+
st.markdown("Built with <span class='tech-badge'>Streamlit</span> + <span class='tech-badge'>Groq</span>", unsafe_allow_html=True)
|
413 |
+
|
414 |
# Initialize Session State
|
415 |
if 'vector_store' not in st.session_state:
|
416 |
st.session_state.vector_store = None
|
417 |
if 'chat_history' not in st.session_state:
|
418 |
st.session_state.chat_history = []
|
419 |
+
|
420 |
+
# Display Dataset Preview
|
421 |
+
display_dataset_preview()
|
422 |
+
|
423 |
+
# App Pages
|
424 |
+
if app_mode == "Data Upload":
|
425 |
+
st.header("📤 Data Upload & Profiling")
|
426 |
+
uploaded_file = st.file_uploader("Choose a file", type=["csv", "xlsx"], key="file_uploader")
|
427 |
if uploaded_file:
|
428 |
+
st.session_state.pop('raw_data', None)
|
429 |
+
st.session_state.pop('cleaned_data', None)
|
430 |
+
st.session_state.pop('data_versions', None)
|
431 |
+
try:
|
432 |
+
if uploaded_file.name.endswith('.csv'):
|
433 |
+
df = pd.read_csv(uploaded_file)
|
434 |
+
else:
|
435 |
+
df = pd.read_excel(uploaded_file)
|
436 |
+
if df.empty:
|
437 |
+
st.error("Uploaded file is empty.")
|
438 |
+
st.stop()
|
439 |
+
st.session_state.raw_data = df
|
440 |
+
st.session_state.cleaned_data = df.copy()
|
441 |
+
st.session_state.dataset_text = convert_df_to_text(df)
|
442 |
+
st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
|
443 |
+
if 'data_versions' not in st.session_state:
|
444 |
+
st.session_state.data_versions = [df.copy()]
|
445 |
+
col1, col2, col3 = st.columns(3)
|
446 |
+
with col1: st.metric("Rows", df.shape[0])
|
447 |
+
with col2: st.metric("Columns", df.shape[1])
|
448 |
+
with col3: st.metric("Missing Values", df.isna().sum().sum())
|
449 |
+
if st.checkbox("Show Data Preview"):
|
450 |
+
st.dataframe(df.head(10), use_container_width=True)
|
451 |
+
if st.button("Generate Full Profile Report"):
|
452 |
+
with st.spinner("Generating report..."):
|
453 |
+
pr = ProfileReport(df, explorative=True)
|
454 |
+
st_profile_report(pr)
|
455 |
+
st.success("✅ Data loaded successfully!")
|
456 |
+
except Exception as e:
|
457 |
+
st.error(f"An error occurred: {str(e)}")
|
458 |
|
459 |
+
elif app_mode == "Data Cleaning":
|
460 |
+
st.header("🧹 Smart Data Cleaning")
|
461 |
+
if 'raw_data' not in st.session_state:
|
462 |
+
st.warning("Please upload data first in the Data Upload section.")
|
463 |
+
st.stop()
|
464 |
+
if 'cleaned_data' in st.session_state:
|
465 |
+
df = st.session_state.cleaned_data.copy()
|
466 |
else:
|
467 |
+
st.session_state.cleaned_data = st.session_state.raw_data.copy()
|
468 |
+
df = st.session_state.cleaned_data.copy()
|
469 |
+
|
470 |
+
enhance_section_title("📊 Data Health Dashboard")
|
471 |
+
with st.expander("Explore Data Health Metrics", expanded=True):
|
472 |
+
col1, col2, col3 = st.columns(3)
|
473 |
+
with col1: st.metric("Columns", len(df.columns))
|
474 |
+
with col2: st.metric("Rows", len(df))
|
475 |
+
with col3: st.metric("Missing Values", df.isna().sum().sum())
|
476 |
+
if st.button("Generate Detailed Health Report"):
|
477 |
+
with st.spinner("Generating report..."):
|
478 |
+
profile = ProfileReport(df, minimal=True)
|
479 |
+
st_profile_report(profile)
|
480 |
+
if 'data_versions' in st.session_state and len(st.session_state.data_versions) > 1:
|
481 |
+
if st.button("Undo Last Action"):
|
482 |
+
st.session_state.data_versions.pop()
|
483 |
+
st.session_state.cleaned_data = st.session_state.data_versions[-1].copy()
|
484 |
+
st.session_state.dataset_text = convert_df_to_text(st.session_state.cleaned_data)
|
485 |
+
st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
|
486 |
+
st.rerun()
|
487 |
+
|
488 |
+
with st.expander("🛠️ Data Cleaning Operations", expanded=True):
|
489 |
+
enhance_section_title("🔍 Missing Values Treatment")
|
490 |
+
missing_cols = df.columns[df.isna().any()].tolist()
|
491 |
+
if missing_cols:
|
492 |
+
cols = st.multiselect("Select columns with missing values", missing_cols)
|
493 |
+
method = st.selectbox("Choose imputation method", [
|
494 |
+
"Drop Missing Values", "Fill with Mean/Median", "Fill with Custom Value", "Forward Fill", "Backward Fill"
|
495 |
+
])
|
496 |
+
if method == "Fill with Custom Value":
|
497 |
+
custom_val = st.text_input("Enter custom value:")
|
498 |
+
if st.button("Apply Missing Value Treatment"):
|
499 |
+
new_df = df.copy()
|
500 |
+
if method == "Drop Missing Values":
|
501 |
+
new_df = new_df.dropna(subset=cols)
|
502 |
+
elif method == "Fill with Mean/Median":
|
503 |
+
for col in cols:
|
504 |
+
if pd.api.types.is_numeric_dtype(new_df[col]):
|
505 |
+
new_df[col] = new_df[col].fillna(new_df[col].median())
|
506 |
+
else:
|
507 |
+
new_df[col] = new_df[col].fillna(new_df[col].mode()[0])
|
508 |
+
elif method == "Fill with Custom Value" and custom_val:
|
509 |
+
new_df[cols] = new_df[cols].fillna(custom_val)
|
510 |
+
elif method == "Forward Fill":
|
511 |
+
new_df[cols] = new_df[cols].ffill()
|
512 |
+
elif method == "Backward Fill":
|
513 |
+
new_df[cols] = new_df[cols].bfill()
|
514 |
+
update_cleaned_data(new_df)
|
515 |
+
else:
|
516 |
+
st.success("✨ No missing values detected!")
|
517 |
+
|
518 |
+
enhance_section_title("🔄 Data Type Conversion")
|
519 |
+
col_to_convert = st.selectbox("Select column to convert", df.columns)
|
520 |
+
new_type = st.selectbox("Select new data type", ["String", "Integer", "Float", "Boolean", "Datetime"])
|
521 |
+
if new_type == "Datetime":
|
522 |
+
date_format = st.text_input("Enter date format (e.g., %Y-%m-%d):", "%Y-%m-%d")
|
523 |
+
if st.button("Convert Data Type"):
|
524 |
+
new_df = df.copy()
|
525 |
+
if new_type == "String":
|
526 |
+
new_df[col_to_convert] = new_df[col_to_convert].astype(str)
|
527 |
+
elif new_type == "Integer":
|
528 |
+
new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce').astype('Int64')
|
529 |
+
elif new_type == "Float":
|
530 |
+
new_df[col_to_convert] = pd.to_numeric(new_df[col_to_convert], errors='coerce')
|
531 |
+
elif new_type == "Boolean":
|
532 |
+
new_df[col_to_convert] = new_df[col_to_convert].astype(bool)
|
533 |
+
elif new_type == "Datetime":
|
534 |
+
new_df[col_to_convert] = pd.to_datetime(new_df[col_to_convert], format=date_format, errors='coerce')
|
535 |
+
update_cleaned_data(new_df)
|
536 |
+
|
537 |
+
enhance_section_title("🗑️ Drop Columns")
|
538 |
+
columns_to_drop = st.multiselect("Select columns to remove", df.columns)
|
539 |
+
if columns_to_drop and st.button("Confirm Column Removal"):
|
540 |
+
new_df = df.copy()
|
541 |
+
new_df = new_df.drop(columns=columns_to_drop)
|
542 |
+
update_cleaned_data(new_df)
|
543 |
+
|
544 |
+
enhance_section_title("🔢 Encoding Options")
|
545 |
+
encoding_method = st.radio("Choose encoding method", ("Label Encoding", "One-Hot Encoding"))
|
546 |
+
data_to_encode = st.multiselect("Select columns to encode", df.select_dtypes(include='object').columns)
|
547 |
+
if data_to_encode and st.button("Apply Encoding"):
|
548 |
+
new_df = df.copy()
|
549 |
+
if encoding_method == "Label Encoding":
|
550 |
+
for col in data_to_encode:
|
551 |
+
le = LabelEncoder()
|
552 |
+
new_df[col] = le.fit_transform(new_df[col].astype(str))
|
553 |
+
elif encoding_method == "One-Hot Encoding":
|
554 |
+
new_df = pd.get_dummies(new_df, columns=data_to_encode, drop_first=True, dtype=int)
|
555 |
+
update_cleaned_data(new_df)
|
556 |
+
|
557 |
+
enhance_section_title("📏 StandardScaler")
|
558 |
+
scale_cols = st.multiselect("Select numerical columns to scale", df.select_dtypes(include=np.number).columns)
|
559 |
+
if scale_cols and st.button("Apply StandardScaler"):
|
560 |
+
new_df = df.copy()
|
561 |
+
scaler = StandardScaler()
|
562 |
+
new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
|
563 |
+
update_cleaned_data(new_df)
|
564 |
+
|
565 |
+
elif app_mode == "EDA":
|
566 |
+
st.header("🔍 Interactive Data Explorer")
|
567 |
if 'cleaned_data' not in st.session_state:
|
568 |
+
st.warning("Please upload and clean data first.")
|
569 |
+
st.stop()
|
570 |
+
df = st.session_state.cleaned_data.copy()
|
571 |
+
|
572 |
+
enhance_section_title("Dataset Overview")
|
573 |
+
with st.container():
|
574 |
+
col1, col2, col3, col4 = st.columns(4)
|
575 |
+
col1.metric("Total Rows", df.shape[0])
|
576 |
+
col2.metric("Total Columns", df.shape[1])
|
577 |
+
missing_percentage = df.isna().sum().sum() / df.size * 100
|
578 |
+
col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
|
579 |
+
col4.metric("Duplicates", df.duplicated().sum())
|
580 |
+
|
581 |
+
tab1, tab2, tab3 = st.tabs(["Quick Preview", "Column Types", "Missing Matrix"])
|
582 |
+
with tab1:
|
583 |
+
st.write("First few rows of the dataset:")
|
584 |
+
st.dataframe(df.head(), use_container_width=True)
|
585 |
+
with tab2:
|
586 |
+
st.write("Column Data Types:")
|
587 |
+
type_counts = df.dtypes.value_counts().reset_index()
|
588 |
+
type_counts.columns = ['Type', 'Count']
|
589 |
+
st.dataframe(type_counts, use_container_width=True)
|
590 |
+
with tab3:
|
591 |
+
st.write("Missing Values Matrix:")
|
592 |
+
fig_missing = px.imshow(df.isna(), color_continuous_scale=['#e0e0e0', '#66c2a5'])
|
593 |
+
fig_missing.update_layout(coloraxis_colorscale=[[0, 'lightgrey'], [1, '#FF4B4B']])
|
594 |
+
st.plotly_chart(fig_missing, use_container_width=True)
|
595 |
+
|
596 |
+
enhance_section_title("Interactive Visualization Builder")
|
597 |
+
with st.container():
|
598 |
+
col1, col2 = st.columns([1, 3])
|
599 |
+
with col1:
|
600 |
+
plot_type = st.selectbox("Choose visualization type", [
|
601 |
+
"Scatter Plot", "Histogram", "Box Plot", "Line Chart", "Bar Chart", "Correlation Matrix"
|
602 |
+
])
|
603 |
+
x_axis = st.selectbox("X-axis", df.columns) if plot_type != "Correlation Matrix" else None
|
604 |
+
y_axis = st.selectbox("Y-axis", df.columns) if plot_type in ["Scatter Plot", "Box Plot", "Line Chart"] else None
|
605 |
+
color_by = st.selectbox("Color encoding", ["None"] + df.columns.tolist(), format_func=lambda x: "No color" if x == "None" else x) if plot_type != "Correlation Matrix" else None
|
606 |
+
|
607 |
+
with col2:
|
608 |
+
try:
|
609 |
+
fig = None
|
610 |
+
if plot_type == "Scatter Plot" and x_axis and y_axis:
|
611 |
+
fig = px.scatter(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Scatter Plot of {x_axis} vs {y_axis}')
|
612 |
+
elif plot_type == "Histogram" and x_axis:
|
613 |
+
fig = px.histogram(df, x=x_axis, color=color_by if color_by != "None" else None, nbins=30, title=f'Histogram of {x_axis}')
|
614 |
+
elif plot_type == "Box Plot" and x_axis and y_axis:
|
615 |
+
fig = px.box(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Box Plot of {x_axis} vs {y_axis}')
|
616 |
+
elif plot_type == "Line Chart" and x_axis and y_axis:
|
617 |
+
fig = px.line(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Line Chart of {x_axis} vs {y_axis}')
|
618 |
+
elif plot_type == "Bar Chart" and x_axis:
|
619 |
+
fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None, title=f'Bar Chart of {x_axis}')
|
620 |
+
elif plot_type == "Correlation Matrix":
|
621 |
+
numeric_df = df.select_dtypes(include=np.number)
|
622 |
+
if len(numeric_df.columns) > 1:
|
623 |
+
corr = numeric_df.corr()
|
624 |
+
fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title='Correlation Matrix')
|
625 |
+
|
626 |
+
if fig:
|
627 |
+
fig.update_layout(template="plotly_white")
|
628 |
+
st.plotly_chart(fig, use_container_width=True)
|
629 |
+
st.session_state.last_plot = {
|
630 |
+
"type": plot_type,
|
631 |
+
"x": x_axis,
|
632 |
+
"y": y_axis,
|
633 |
+
"data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
|
634 |
+
}
|
635 |
+
plot_text = extract_plot_data(st.session_state.last_plot, df)
|
636 |
+
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
637 |
+
with st.expander("Extracted Plot Data"):
|
638 |
+
st.text(plot_text)
|
639 |
+
else:
|
640 |
+
st.error("Please provide required inputs for the selected plot type.")
|
641 |
+
except Exception as e:
|
642 |
+
st.error(f"Couldn't create visualization: {str(e)}")
|
643 |
+
|
644 |
+
# Chatbot Section
|
645 |
+
st.markdown("---")
|
646 |
+
st.markdown('<div class="chat-container">', unsafe_allow_html=True)
|
647 |
+
st.subheader("💬 AI Chatbot Assistant (RAG Enabled)")
|
648 |
+
st.info("Ask about your data or app features! Try: 'drop columns X, Y', 'scatter plot of X vs Y', 'analyze plot'")
|
649 |
+
|
650 |
+
for message in st.session_state.chat_history:
|
651 |
+
with st.chat_message(message["role"]):
|
652 |
+
st.markdown(f'<div class="{message["role"]}-message">{message["content"]}</div>', unsafe_allow_html=True)
|
653 |
+
|
654 |
+
user_input = st.chat_input("Ask me anything...")
|
655 |
+
if user_input:
|
656 |
+
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
657 |
+
with st.chat_message("user"):
|
658 |
+
st.markdown(f'<div class="user-message">{user_input}</div>', unsafe_allow_html=True)
|
659 |
+
with st.spinner("Processing..."):
|
660 |
+
func, param = parse_command(user_input)
|
661 |
+
if func:
|
662 |
+
response = func(param) if param else func(None)
|
663 |
+
else:
|
664 |
+
response = get_chatbot_response(user_input, app_mode, st.session_state.vector_store, model)
|
665 |
+
st.session_state.chat_history.append({"role": "assistant", "content": response})
|
666 |
+
with st.chat_message("assistant"):
|
667 |
+
st.markdown(f'<div class="bot-message">{response}</div>', unsafe_allow_html=True)
|
668 |
+
|
669 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
670 |
+
|
671 |
+
# Footer
|
672 |
+
st.markdown("""
|
673 |
+
<div class="footer">
|
674 |
+
<div>Built with <span class="tech-badge">Streamlit</span> + <span class="tech-badge">Groq</span> + <span class="tech-badge">LangChain</span> + <span class="tech-badge">FAISS</span></div>
|
675 |
+
<div style="margin-top: 8px;">Fast inference for data insights</div>
|
676 |
+
</div>
|
677 |
+
""", unsafe_allow_html=True)
|
678 |
|
679 |
if __name__ == "__main__":
|
680 |
main()
|