CosmickVisions commited on
Commit
d092d3c
·
verified ·
1 Parent(s): c9f8c9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +255 -115
app.py CHANGED
@@ -23,13 +23,11 @@ st.set_page_config(page_title="Data-Vision Pro", layout="wide", initial_sidebar_
23
  # Load environment variables
24
  load_dotenv()
25
 
26
- # Initialize Groq client
27
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
28
-
29
- # Initialize HuggingFace embeddings
30
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
31
 
32
- # Custom CSS for fullscreen and responsiveness
33
  st.markdown("""
34
  <style>
35
  :root {
@@ -37,22 +35,25 @@ st.markdown("""
37
  --blue: #5C89BC;
38
  --gold: #A87E01;
39
  --text-color: #333333;
 
40
  }
41
  .stApp {
42
  background-color: var(--silver);
43
  font-family: 'Inter', sans-serif;
44
- padding: 1rem;
45
  height: 100vh;
46
- width: 100vw;
47
  overflow-y: auto;
 
 
48
  }
49
  .header {
50
  background-color: var(--blue);
51
  color: white;
52
- padding: 1.5rem;
53
  border-radius: 8px;
54
  text-align: center;
55
- margin-bottom: 1rem;
 
56
  }
57
  .header-title {
58
  font-size: 2rem;
@@ -62,26 +63,27 @@ st.markdown("""
62
  .header-subtitle {
63
  font-size: 1rem;
64
  margin-top: 0.5rem;
 
65
  }
66
  .nav-bar {
67
  background-color: white;
68
  border-radius: 8px;
69
  padding: 1rem;
70
  display: flex;
71
- justify-content: space-between;
72
  align-items: center;
73
  flex-wrap: wrap;
74
- gap: 1rem;
75
- margin-bottom: 1.5rem;
76
  }
77
  .nav-item {
78
  color: var(--blue);
79
  font-weight: 500;
80
- cursor: pointer;
81
- padding: 0.5rem 1rem;
82
  border-radius: 5px;
83
- flex: 1;
84
  text-align: center;
 
 
85
  }
86
  .nav-item:hover {
87
  background-color: var(--gold);
@@ -90,69 +92,93 @@ st.markdown("""
90
  .main-container {
91
  background-color: white;
92
  border-radius: 8px;
93
- padding: 1.5rem;
94
- min-height: 60vh;
95
- margin-bottom: 1.5rem;
 
96
  }
97
  .chat-container {
98
  background-color: white;
99
- border-radius: 8px;
100
- padding: 1.5rem;
101
- margin-bottom: 1rem;
 
 
 
 
 
 
 
 
 
 
102
  }
103
  .user-message, .bot-message {
104
- padding: 1rem;
105
  border-radius: 12px;
106
  margin-bottom: 0.5rem;
107
- max-width: 80%;
 
108
  }
109
  .user-message {
110
  background-color: var(--blue);
111
  color: white;
112
  margin-left: auto;
 
113
  }
114
  .bot-message {
115
  background-color: #F0F0F0;
116
  color: var(--text-color);
117
  margin-right: auto;
 
118
  }
119
  .footer {
120
  text-align: center;
121
  color: var(--text-color);
122
  font-size: 0.9rem;
123
  padding: 1rem 0;
 
124
  }
125
  h2 {
126
  color: var(--blue);
127
  border-bottom: 2px solid var(--gold);
128
  padding-bottom: 0.5rem;
129
  font-size: 1.5rem;
 
130
  }
131
  .stButton > button {
132
  background-color: var(--gold);
133
  color: white;
134
  border-radius: 5px;
135
- padding: 0.5rem 1rem;
 
 
136
  }
137
  .stButton > button:hover {
138
  background-color: #8C6B01;
139
  }
 
 
 
 
140
  @media (max-width: 768px) {
141
  .header-title { font-size: 1.5rem; }
142
  .header-subtitle { font-size: 0.9rem; }
143
- .nav-bar { flex-direction: column; padding: 0.5rem; }
144
- .nav-item { margin: 0.5rem 0; width: 100%; }
145
- .main-container, .chat-container { padding: 1rem; }
 
146
  h2 { font-size: 1.2rem; }
147
  }
148
  @media (max-width: 480px) {
149
  .header-title { font-size: 1.2rem; }
150
- .stApp { padding: 0.5rem; }
 
151
  }
152
  </style>
153
  """, unsafe_allow_html=True)
154
 
155
- # Helper Functions (unchanged, included for completeness)
156
  def enhance_section_title(title):
157
  st.markdown(f"<h2>{title}</h2>", unsafe_allow_html=True)
158
 
@@ -162,41 +188,126 @@ def update_cleaned_data(df):
162
  st.session_state.data_versions = [st.session_state.raw_data.copy()]
163
  st.session_state.data_versions.append(df.copy())
164
  st.session_state.dataset_text = convert_df_to_text(df)
 
165
  st.success("✅ Action completed!")
166
  st.rerun()
167
 
168
  def convert_df_to_text(df):
169
- # (Existing implementation)
170
- pass
 
 
 
 
 
 
 
 
171
 
172
  def create_vector_store(df_text):
173
- # (Existing implementation)
174
- pass
 
 
 
 
 
 
 
 
175
 
176
  def update_vector_store_with_plot(plot_text, existing_vector_store):
177
- # (Existing implementation)
178
- pass
 
 
 
 
 
179
 
180
  def extract_plot_data(plot_info, df):
181
- # (Existing implementation)
182
- pass
 
 
 
 
 
 
 
 
 
 
183
 
184
  def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
185
- # (Existing implementation)
186
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
- # Command Functions (unchanged)
189
- def drop_columns(columns): pass
190
- def generate_scatter_plot(params): pass
191
- def generate_histogram(params): pass
192
- def analyze_plot(): pass
193
- def parse_command(command): pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
  # Dataset Preview
196
  def display_dataset_preview():
197
  if 'cleaned_data' in st.session_state:
198
  st.subheader("Dataset Preview")
199
- st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True)
200
 
201
  # Main App
202
  def main():
@@ -209,90 +320,119 @@ def main():
209
  """, unsafe_allow_html=True)
210
 
211
  # Navigation Bar
212
- st.markdown('<div class="nav-bar">', unsafe_allow_html=True)
213
- col1, col2, col3, col4 = st.columns([1, 1, 1, 1])
214
- with col1:
215
- uploaded_file = st.file_uploader("Upload File", type=["csv", "xlsx"], key="file_uploader")
216
- with col2:
217
- app_mode = st.selectbox("Mode", ["Data Upload", "Data Cleaning", "EDA"], label_visibility="collapsed")
218
- with col3:
219
- model = st.selectbox("Model", ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], label_visibility="collapsed")
220
- with col4:
221
- if 'cleaned_data' in st.session_state:
222
- csv = st.session_state.cleaned_data.to_csv(index=False)
223
- st.download_button(label="Download", data=csv, file_name='cleaned_data.csv', mime='text/csv')
224
- st.markdown('</div>', unsafe_allow_html=True)
 
225
 
226
  # Initialize Session State
227
  if 'vector_store' not in st.session_state:
228
  st.session_state.vector_store = None
229
  if 'chat_history' not in st.session_state:
230
  st.session_state.chat_history = []
 
 
 
 
 
 
 
 
231
 
232
  # Main Content
233
- st.markdown('<div class="main-container">', unsafe_allow_html=True)
234
- display_dataset_preview()
 
235
 
236
- if app_mode == "Data Upload":
237
- enhance_section_title("📤 Data Upload")
238
- if uploaded_file:
239
- try:
240
- df = pd.read_csv(uploaded_file) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file)
241
- st.session_state.raw_data = df
242
- st.session_state.cleaned_data = df.copy()
243
- st.session_state.dataset_text = convert_df_to_text(df)
244
- st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
245
- st.session_state.data_versions = [df.copy()]
246
- col1, col2, col3 = st.columns(3)
247
- with col1: st.metric("Rows", df.shape[0])
248
- with col2: st.metric("Columns", df.shape[1])
249
- with col3: st.metric("Missing", df.isna().sum().sum())
250
- if st.button("Generate Report"):
251
- pr = ProfileReport(df, explorative=True)
252
- st_profile_report(pr)
253
- except Exception as e:
254
- st.error(f"Error: {e}")
255
 
256
- elif app_mode == "Data Cleaning":
257
- enhance_section_title("🧹 Data Cleaning")
258
- if 'cleaned_data' not in st.session_state:
259
- st.warning("Upload data first.")
260
- return
261
- df = st.session_state.cleaned_data.copy()
262
- # Simplified cleaning options (expand as needed)
263
- columns_to_drop = st.multiselect("Drop Columns", df.columns)
264
- if st.button("Drop Selected"):
265
- new_df = df.drop(columns=columns_to_drop)
266
- update_cleaned_data(new_df)
267
 
268
- elif app_mode == "EDA":
269
- enhance_section_title("🔍 EDA")
270
- if 'cleaned_data' not in st.session_state:
271
- st.warning("Upload data first.")
272
- return
273
- df = st.session_state.cleaned_data.copy()
274
- plot_type = st.selectbox("Plot Type", ["Scatter Plot", "Histogram"])
275
- x_axis = st.selectbox("X-axis", df.columns)
276
- if plot_type == "Scatter Plot":
277
- y_axis = st.selectbox("Y-axis", df.columns)
278
- if st.button("Generate"):
279
- fig = px.scatter(df, x=x_axis, y=y_axis)
280
- st.plotly_chart(fig, use_container_width=True)
 
 
 
 
 
 
 
 
281
 
282
- st.markdown('</div>', unsafe_allow_html=True)
283
 
284
  # Chatbot
285
- st.markdown('<div class="chat-container">', unsafe_allow_html=True)
286
- st.subheader("💬 Chatbot")
287
- for message in st.session_state.chat_history:
288
- with st.chat_message(message["role"]):
289
- st.markdown(f'<div class="{message["role"]}-message">{message["content"]}</div>', unsafe_allow_html=True)
290
- if user_input := st.chat_input("Ask anything..."):
291
- st.session_state.chat_history.append({"role": "user", "content": user_input})
292
- response = get_chatbot_response(user_input, app_mode, st.session_state.vector_store, model)
293
- st.session_state.chat_history.append({"role": "assistant", "content": response})
294
- st.rerun()
295
- st.markdown('</div>', unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
  # Footer
298
  st.markdown('<div class="footer">Built with Streamlit & Groq</div>', unsafe_allow_html=True)
 
23
  # Load environment variables
24
  load_dotenv()
25
 
26
+ # Initialize Groq client and embeddings
27
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 
 
28
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
29
 
30
+ # Custom CSS with improved spacing, sizing, and UI/UX
31
  st.markdown("""
32
  <style>
33
  :root {
 
35
  --blue: #5C89BC;
36
  --gold: #A87E01;
37
  --text-color: #333333;
38
+ --spacing-unit: 1.5rem;
39
  }
40
  .stApp {
41
  background-color: var(--silver);
42
  font-family: 'Inter', sans-serif;
43
+ padding: var(--spacing-unit);
44
  height: 100vh;
 
45
  overflow-y: auto;
46
+ display: flex;
47
+ flex-direction: column;
48
  }
49
  .header {
50
  background-color: var(--blue);
51
  color: white;
52
+ padding: var(--spacing-unit);
53
  border-radius: 8px;
54
  text-align: center;
55
+ margin-bottom: var(--spacing-unit);
56
+ box-shadow: 0 2px 10px rgba(0,0,0,0.1);
57
  }
58
  .header-title {
59
  font-size: 2rem;
 
63
  .header-subtitle {
64
  font-size: 1rem;
65
  margin-top: 0.5rem;
66
+ opacity: 0.9;
67
  }
68
  .nav-bar {
69
  background-color: white;
70
  border-radius: 8px;
71
  padding: 1rem;
72
  display: flex;
73
+ gap: 1rem;
74
  align-items: center;
75
  flex-wrap: wrap;
76
+ margin-bottom: var(--spacing-unit);
77
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
78
  }
79
  .nav-item {
80
  color: var(--blue);
81
  font-weight: 500;
82
+ padding: 0.75rem 1.5rem;
 
83
  border-radius: 5px;
 
84
  text-align: center;
85
+ transition: all 0.2s ease;
86
+ flex: 1;
87
  }
88
  .nav-item:hover {
89
  background-color: var(--gold);
 
92
  .main-container {
93
  background-color: white;
94
  border-radius: 8px;
95
+ padding: var(--spacing-unit);
96
+ flex-grow: 1;
97
+ margin-bottom: var(--spacing-unit);
98
+ box-shadow: 0 2px 5px rgba(0,0,0,0.05);
99
  }
100
  .chat-container {
101
  background-color: white;
102
+ border-radius: 8px 8px 0 0;
103
+ padding: 1rem;
104
+ position: fixed;
105
+ bottom: 0;
106
+ left: var(--spacing-unit);
107
+ right: var(--spacing-unit);
108
+ max-height: 40vh;
109
+ overflow-y: auto;
110
+ box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
111
+ z-index: 1000;
112
+ }
113
+ .chat-message-container {
114
+ margin-bottom: 0.5rem;
115
  }
116
  .user-message, .bot-message {
117
+ padding: 0.75rem 1rem;
118
  border-radius: 12px;
119
  margin-bottom: 0.5rem;
120
+ max-width: 70%;
121
+ word-wrap: break-word;
122
  }
123
  .user-message {
124
  background-color: var(--blue);
125
  color: white;
126
  margin-left: auto;
127
+ box-shadow: 0 1px 3px rgba(0,0,0,0.1);
128
  }
129
  .bot-message {
130
  background-color: #F0F0F0;
131
  color: var(--text-color);
132
  margin-right: auto;
133
+ box-shadow: 0 1px 3px rgba(0,0,0,0.05);
134
  }
135
  .footer {
136
  text-align: center;
137
  color: var(--text-color);
138
  font-size: 0.9rem;
139
  padding: 1rem 0;
140
+ margin-top: auto;
141
  }
142
  h2 {
143
  color: var(--blue);
144
  border-bottom: 2px solid var(--gold);
145
  padding-bottom: 0.5rem;
146
  font-size: 1.5rem;
147
+ margin-bottom: 1rem;
148
  }
149
  .stButton > button {
150
  background-color: var(--gold);
151
  color: white;
152
  border-radius: 5px;
153
+ padding: 0.75rem 1.5rem;
154
+ font-weight: 500;
155
+ transition: background-color 0.2s ease;
156
  }
157
  .stButton > button:hover {
158
  background-color: #8C6B01;
159
  }
160
+ .stTextInput > div > div > input {
161
+ border-radius: 5px;
162
+ padding: 0.75rem;
163
+ }
164
  @media (max-width: 768px) {
165
  .header-title { font-size: 1.5rem; }
166
  .header-subtitle { font-size: 0.9rem; }
167
+ .nav-bar { flex-direction: column; padding: 0.75rem; gap: 0.5rem; }
168
+ .nav-item { padding: 0.5rem; }
169
+ .main-container { padding: 1rem; }
170
+ .chat-container { padding: 0.75rem; max-height: 50vh; }
171
  h2 { font-size: 1.2rem; }
172
  }
173
  @media (max-width: 480px) {
174
  .header-title { font-size: 1.2rem; }
175
+ .stApp { padding: 0.75rem; }
176
+ .chat-container { left: 0.75rem; right: 0.75rem; }
177
  }
178
  </style>
179
  """, unsafe_allow_html=True)
180
 
181
+ # Helper Functions (fully implemented from original intent)
182
  def enhance_section_title(title):
183
  st.markdown(f"<h2>{title}</h2>", unsafe_allow_html=True)
184
 
 
188
  st.session_state.data_versions = [st.session_state.raw_data.copy()]
189
  st.session_state.data_versions.append(df.copy())
190
  st.session_state.dataset_text = convert_df_to_text(df)
191
+ st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
192
  st.success("✅ Action completed!")
193
  st.rerun()
194
 
195
  def convert_df_to_text(df):
196
+ text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
197
+ text += f"Missing Values: {df.isna().sum().sum()}\nColumns:\n"
198
+ for col in df.columns:
199
+ text += f"- {col} ({df[col].dtype}): "
200
+ if pd.api.types.is_numeric_dtype(df[col]):
201
+ text += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
202
+ else:
203
+ text += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
204
+ text += f", Missing={df[col].isna().sum()}\n"
205
+ return text
206
 
207
  def create_vector_store(df_text):
208
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
209
+ temp_file.write(df_text)
210
+ temp_path = temp_file.name
211
+ loader = TextLoader(temp_path)
212
+ documents = loader.load()
213
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
214
+ texts = text_splitter.split_documents(documents)
215
+ vector_store = FAISS.from_documents(texts, embeddings)
216
+ os.unlink(temp_path)
217
+ return vector_store
218
 
219
  def update_vector_store_with_plot(plot_text, existing_vector_store):
220
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
221
+ plot_docs = text_splitter.create_documents([plot_text])
222
+ if existing_vector_store:
223
+ existing_vector_store.add_documents(plot_docs)
224
+ else:
225
+ existing_vector_store = FAISS.from_documents(plot_docs, embeddings)
226
+ return existing_vector_store
227
 
228
  def extract_plot_data(plot_info, df):
229
+ plot_type = plot_info.get("type", "").lower()
230
+ x_col = plot_info.get("x", "")
231
+ y_col = plot_info.get("y", "")
232
+ if x_col not in df.columns or (plot_type == "scatter" and y_col not in df.columns):
233
+ return None, "Invalid column names."
234
+ if plot_type == "scatter":
235
+ fig = px.scatter(df, x=x_col, y=y_col)
236
+ return fig, f"Scatter plot: {x_col} vs {y_col}"
237
+ elif plot_type == "histogram":
238
+ fig = px.histogram(df, x=x_col)
239
+ return fig, f"Histogram of {x_col}"
240
+ return None, "Unsupported plot type."
241
 
242
  def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
243
+ context = ""
244
+ if vector_store:
245
+ docs = vector_store.similarity_search(user_input, k=3)
246
+ context = "\n\nData Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
247
+ system_prompt = f"You are an expert in {app_mode.lower()} analysis.{context}"
248
+ response = client.chat.completions.create(
249
+ model=model,
250
+ messages=[
251
+ {"role": "system", "content": system_prompt},
252
+ {"role": "user", "content": user_input}
253
+ ],
254
+ temperature=0.7,
255
+ max_tokens=1024
256
+ ).choices[0].message.content
257
+ return response
258
+
259
+ # Command Functions (basic implementations from original intent)
260
+ def drop_columns(columns):
261
+ if 'cleaned_data' in st.session_state:
262
+ df = st.session_state.cleaned_data.drop(columns=columns)
263
+ update_cleaned_data(df)
264
 
265
+ def generate_scatter_plot(params):
266
+ if 'cleaned_data' in st.session_state:
267
+ df = st.session_state.cleaned_data
268
+ x, y = params.get("x"), params.get("y")
269
+ if x in df.columns and y in df.columns:
270
+ fig = px.scatter(df, x=x, y=y)
271
+ st.plotly_chart(fig, use_container_width=True)
272
+ plot_text = f"Scatter plot of {x} vs {y}"
273
+ st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
274
+
275
+ def generate_histogram(params):
276
+ if 'cleaned_data' in st.session_state:
277
+ df = st.session_state.cleaned_data
278
+ x = params.get("x")
279
+ if x in df.columns:
280
+ fig = px.histogram(df, x=x)
281
+ st.plotly_chart(fig, use_container_width=True)
282
+ plot_text = f"Histogram of {x}"
283
+ st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
284
+
285
+ def analyze_plot():
286
+ if 'cleaned_data' in st.session_state:
287
+ st.write("Plot analysis not fully implemented yet.")
288
+
289
+ def parse_command(command):
290
+ command = command.lower().strip()
291
+ if "drop" in command:
292
+ columns = re.findall(r"drop\s+columns?\s+(.+)", command)
293
+ if columns:
294
+ cols = [col.strip() for col in columns[0].split(",")]
295
+ return {"action": "drop_columns", "columns": cols}
296
+ elif "scatter" in command:
297
+ match = re.search(r"scatter\s+plot\s+x=(\w+)\s+y=(\w+)", command)
298
+ if match:
299
+ return {"action": "scatter_plot", "x": match.group(1), "y": match.group(2)}
300
+ elif "histogram" in command:
301
+ match = re.search(r"histogram\s+of\s+(\w+)", command)
302
+ if match:
303
+ return {"action": "histogram", "x": match.group(1)}
304
+ return None
305
 
306
  # Dataset Preview
307
  def display_dataset_preview():
308
  if 'cleaned_data' in st.session_state:
309
  st.subheader("Dataset Preview")
310
+ st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True, height=200)
311
 
312
  # Main App
313
  def main():
 
320
  """, unsafe_allow_html=True)
321
 
322
  # Navigation Bar
323
+ with st.container():
324
+ st.markdown('<div class="nav-bar">', unsafe_allow_html=True)
325
+ col1, col2, col3, col4 = st.columns([2, 2, 2, 1])
326
+ with col1:
327
+ uploaded_file = st.file_uploader("Upload File", type=["csv", "xlsx"], key="file_uploader")
328
+ with col2:
329
+ app_mode = st.selectbox("Mode", ["Data Upload", "Data Cleaning", "EDA"], label_visibility="collapsed")
330
+ with col3:
331
+ model = st.selectbox("Model", ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], label_visibility="collapsed")
332
+ with col4:
333
+ if 'cleaned_data' in st.session_state:
334
+ csv = st.session_state.cleaned_data.to_csv(index=False)
335
+ st.download_button(label="Download", data=csv, file_name='cleaned_data.csv', mime='text/csv')
336
+ st.markdown('</div>', unsafe_allow_html=True)
337
 
338
  # Initialize Session State
339
  if 'vector_store' not in st.session_state:
340
  st.session_state.vector_store = None
341
  if 'chat_history' not in st.session_state:
342
  st.session_state.chat_history = []
343
+ if 'raw_data' not in st.session_state:
344
+ st.session_state.raw_data = None
345
+ if 'cleaned_data' not in st.session_state:
346
+ st.session_state.cleaned_data = None
347
+ if 'data_versions' not in st.session_state:
348
+ st.session_state.data_versions = []
349
+ if 'dataset_text' not in st.session_state:
350
+ st.session_state.dataset_text = ""
351
 
352
  # Main Content
353
+ with st.container():
354
+ st.markdown('<div class="main-container">', unsafe_allow_html=True)
355
+ display_dataset_preview()
356
 
357
+ if app_mode == "Data Upload":
358
+ enhance_section_title("📤 Data Upload")
359
+ if uploaded_file:
360
+ try:
361
+ df = pd.read_csv(uploaded_file) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file)
362
+ st.session_state.raw_data = df
363
+ st.session_state.cleaned_data = df.copy()
364
+ st.session_state.dataset_text = convert_df_to_text(df)
365
+ st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
366
+ st.session_state.data_versions = [df.copy()]
367
+ col1, col2, col3 = st.columns(3)
368
+ with col1: st.metric("Rows", df.shape[0])
369
+ with col2: st.metric("Columns", df.shape[1])
370
+ with col3: st.metric("Missing", df.isna().sum().sum())
371
+ if st.button("Generate Report"):
372
+ pr = ProfileReport(df, explorative=True)
373
+ st_profile_report(pr)
374
+ except Exception as e:
375
+ st.error(f"Error: {e}")
376
 
377
+ elif app_mode == "Data Cleaning":
378
+ enhance_section_title("🧹 Data Cleaning")
379
+ if 'cleaned_data' not in st.session_state:
380
+ st.warning("Upload data first.")
381
+ else:
382
+ df = st.session_state.cleaned_data.copy()
383
+ columns_to_drop = st.multiselect("Drop Columns", df.columns)
384
+ if st.button("Drop Selected"):
385
+ new_df = df.drop(columns=columns_to_drop)
386
+ update_cleaned_data(new_df)
 
387
 
388
+ elif app_mode == "EDA":
389
+ enhance_section_title("🔍 EDA")
390
+ if 'cleaned_data' not in st.session_state:
391
+ st.warning("Upload data first.")
392
+ else:
393
+ df = st.session_state.cleaned_data.copy()
394
+ plot_type = st.selectbox("Plot Type", ["Scatter Plot", "Histogram"])
395
+ x_axis = st.selectbox("X-axis", df.columns)
396
+ if plot_type == "Scatter Plot":
397
+ y_axis = st.selectbox("Y-axis", df.columns)
398
+ if st.button("Generate"):
399
+ fig = px.scatter(df, x=x_axis, y=y_axis)
400
+ st.plotly_chart(fig, use_container_width=True)
401
+ plot_text = f"Scatter plot of {x_axis} vs {y_axis}"
402
+ st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
403
+ else:
404
+ if st.button("Generate"):
405
+ fig = px.histogram(df, x=x_axis)
406
+ st.plotly_chart(fig, use_container_width=True)
407
+ plot_text = f"Histogram of {x_axis}"
408
+ st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
409
 
410
+ st.markdown('</div>', unsafe_allow_html=True)
411
 
412
  # Chatbot
413
+ with st.container():
414
+ st.markdown('<div class="chat-container">', unsafe_allow_html=True)
415
+ st.subheader("💬 Chatbot")
416
+ for message in st.session_state.chat_history:
417
+ with st.container():
418
+ st.markdown(f'<div class="chat-message-container"><div class="{message["role"]}-message">{message["content"]}</div></div>', unsafe_allow_html=True)
419
+ if user_input := st.chat_input("Ask anything..."):
420
+ command = parse_command(user_input)
421
+ if command:
422
+ if command["action"] == "drop_columns":
423
+ drop_columns(command["columns"])
424
+ elif command["action"] == "scatter_plot":
425
+ generate_scatter_plot({"x": command["x"], "y": command["y"]})
426
+ elif command["action"] == "histogram":
427
+ generate_histogram({"x": command["x"]})
428
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
429
+ st.session_state.chat_history.append({"role": "assistant", "content": "Command executed."})
430
+ else:
431
+ st.session_state.chat_history.append({"role": "user", "content": user_input})
432
+ response = get_chatbot_response(user_input, app_mode, st.session_state.vector_store, model)
433
+ st.session_state.chat_history.append({"role": "assistant", "content": response})
434
+ st.rerun()
435
+ st.markdown('</div>', unsafe_allow_html=True)
436
 
437
  # Footer
438
  st.markdown('<div class="footer">Built with Streamlit & Groq</div>', unsafe_allow_html=True)