CosmickVisions commited on
Commit
782d38d
·
verified ·
1 Parent(s): 428cd3b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -191
app.py CHANGED
@@ -1,30 +1,35 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import plotly.express as px
4
  import numpy as np
5
- from sklearn.model_selection import train_test_split
6
- from sklearn.neural_network import MLPClassifier, MLPRegressor
7
- from sklearn.cluster import KMeans
8
- from sklearn.metrics import accuracy_score, r2_score, silhouette_score
9
- from sklearn.preprocessing import StandardScaler
10
  from ydata_profiling import ProfileReport
11
  from streamlit_pandas_profiling import st_profile_report
 
 
12
  from groq import Groq
13
  from langchain_community.vectorstores import FAISS
14
- from langchain.text_splitter import RecursiveCharacterTextSplitter
15
- from langchain_huggingface import HuggingFaceEmbeddings
16
  from langchain_community.document_loaders import TextLoader
17
- import os
 
 
 
 
18
  import tempfile
19
 
20
- # Initialize clients
 
 
 
 
 
 
21
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
22
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
23
 
24
- # Set page config
25
- st.set_page_config(page_title="Neural-Vision Enhanced", layout="wide")
26
 
27
- # Custom CSS for Responsive Silver-Blue-Gold Theme with Top Nav
28
  st.markdown("""
29
  <style>
30
  :root {
@@ -36,7 +41,7 @@ st.markdown("""
36
  .stApp {
37
  background-color: var(--silver);
38
  font-family: 'Inter', sans-serif;
39
- max-width: 1200px;
40
  margin: 0 auto;
41
  padding: 10px;
42
  }
@@ -45,16 +50,16 @@ st.markdown("""
45
  color: white;
46
  padding: 15px;
47
  border-radius: 5px;
48
- text-align: center;
49
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 
50
  }
51
  .header-title {
52
- font-size: 1.8rem;
53
  font-weight: 700;
54
  margin: 0;
55
  }
56
  .header-subtitle {
57
- font-size: 1rem;
58
  margin-top: 5px;
59
  }
60
  .nav-bar {
@@ -78,38 +83,51 @@ st.markdown("""
78
  background-color: var(--gold);
79
  color: white;
80
  }
81
- .card {
82
- background-color: white;
83
- border-radius: 5px;
84
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
85
- padding: 20px;
86
- margin-bottom: 20px;
87
- }
88
  .chat-container {
89
  background-color: white;
90
  border-radius: 5px;
 
91
  padding: 15px;
92
  margin-top: 20px;
93
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
94
  }
95
  .user-message {
96
  background-color: var(--blue);
97
  color: white;
98
- border-radius: 15px 15px 5px 15px;
99
- padding: 10px;
100
- max-width: 80%;
101
  margin-left: auto;
 
102
  margin-bottom: 10px;
103
  }
104
  .bot-message {
105
  background-color: #F0F0F0;
106
  color: var(--text-color);
107
- border-radius: 15px 15px 15px 5px;
108
- padding: 10px;
109
- max-width: 80%;
110
  margin-right: auto;
 
111
  margin-bottom: 10px;
112
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  .stButton > button {
114
  background-color: var(--gold);
115
  color: white;
@@ -123,10 +141,10 @@ st.markdown("""
123
  }
124
  @media (max-width: 768px) {
125
  .header-title {
126
- font-size: 1.4rem;
127
  }
128
  .header-subtitle {
129
- font-size: 0.9rem;
130
  }
131
  .nav-bar {
132
  flex-direction: column;
@@ -137,62 +155,43 @@ st.markdown("""
137
  width: 100%;
138
  text-align: center;
139
  }
140
- .card, .chat-container {
141
  padding: 10px;
142
  }
143
  .stApp {
144
  padding: 5px;
145
  }
 
 
 
146
  }
147
- # Footer
148
- <footer style='text-align: center; padding: 10px; background-color: var(--blue); color: white; border-radius: 5px; margin-top: 20px;'>
149
- <p>Created by Calvin Allen-Crawford</p>
150
- </footer>
151
  """, unsafe_allow_html=True)
152
 
153
- # Session State Initialization
154
- if 'metrics' not in st.session_state:
155
- st.session_state.metrics = {}
156
- if 'chat_history' not in st.session_state:
157
- st.session_state.chat_history = []
158
- if 'vector_store' not in st.session_state:
159
- st.session_state.vector_store = None
160
- if 'custom_layers' not in st.session_state:
161
- st.session_state.custom_layers = []
162
- if 'prebuilt_selection' not in st.session_state:
163
- st.session_state.prebuilt_selection = None
164
- if 'model_config' not in st.session_state:
165
- st.session_state.model_config = {}
166
- if 'model_builder_mode' not in st.session_state:
167
- st.session_state.model_builder_mode = "prebuilt"
168
- if 'custom_model_type' not in st.session_state:
169
- st.session_state.custom_model_type = "classification"
170
 
171
- # Prebuilt Models
172
- PREBUILT_MODELS = {
173
- "Legal Document Classifier": {
174
- "description": "Optimized for legal document classification.",
175
- "architecture": {"type": "classification", "hidden_layers": [(128, "relu"), (64, "relu")], "dropout": 0.3, "optimizer": "adam", "learning_rate": 0.001},
176
- "domain": "Legal"
177
- },
178
- "Financial Fraud Detector": {
179
- "description": "Detects anomalies in financial transactions.",
180
- "architecture": {"type": "classification", "hidden_layers": [(256, "relu"), (128, "relu"), (64, "relu")], "dropout": 0.4, "optimizer": "adam", "learning_rate": 0.0005},
181
- "domain": "Financial"
182
- },
183
- "Customer Segmentation Engine": {
184
- "description": "Advanced customer segmentation.",
185
- "architecture": {"type": "clustering", "n_clusters": 5, "algorithm": "kmeans", "init": "k-means++", "n_init": 10},
186
- "domain": "Marketing"
187
- }
188
- }
189
 
190
- # Helper Functions (unchanged)
191
  def convert_df_to_text(df):
192
  text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
193
  text += f"Missing Values: {df.isna().sum().sum()}\n"
 
194
  for col in df.columns:
195
- text += f"- {col} ({df[col].dtype}): Mean={df[col].mean():.2f if pd.api.types.is_numeric_dtype(df[col]) else 'N/A'}\n"
 
 
 
 
 
196
  return text
197
 
198
  def create_vector_store(df_text):
@@ -201,122 +200,35 @@ def create_vector_store(df_text):
201
  temp_path = temp_file.name
202
  loader = TextLoader(temp_path)
203
  documents = loader.load()
204
- texts = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100).split_documents(documents)
 
205
  vector_store = FAISS.from_documents(texts, embeddings)
206
  os.unlink(temp_path)
207
  return vector_store
208
 
209
- def get_groq_response(prompt, mode):
210
- context = ""
211
- if st.session_state.vector_store:
212
- docs = st.session_state.vector_store.similarity_search(prompt, k=3)
213
- context += "\nDataset Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
214
- try:
215
- response = client.chat.completions.create(
216
- model="llama3-70b-8192",
217
- messages=[
218
- {"role": "system", "content": f"You are an expert in {mode} data analysis.\n{context}"},
219
- {"role": "user", "content": prompt}
220
- ]
221
- ).choices[0].message.content
222
- return response
223
- except Exception as e:
224
- return f"Error: {str(e)}"
225
-
226
- def build_model_from_config(config, X, y=None):
227
- problem_type = config.get("type", "classification")
228
- if problem_type == "clustering":
229
- return KMeans(n_clusters=config.get("n_clusters", 3), init=config.get("init", "k-means++"), n_init=config.get("n_init", 10), random_state=42)
230
- hidden_layers = config.get("hidden_layers", [(100, "relu")])
231
- layer_sizes = [size for size, _ in hidden_layers]
232
- activation = hidden_layers[0][1] if hidden_layers else "relu"
233
- if problem_type == "classification":
234
- return MLPClassifier(hidden_layer_sizes=layer_sizes, activation=activation, solver=config.get("optimizer", "adam"), learning_rate_init=config.get("learning_rate", 0.001), random_state=42)
235
- return MLPRegressor(hidden_layer_sizes=layer_sizes, activation=activation, solver=config.get("optimizer", "adam"), learning_rate_init=config.get("learning_rate", 0.001), random_state=42)
236
-
237
- # Main Application
238
- def main():
239
- st.markdown('<div class="header"><h1 class="header-title">Neural-Vision Enhanced</h1><p class="header-subtitle">Build & Train Neural Networks</p></div>', unsafe_allow_html=True)
240
-
241
- # Top Navigation Bar
242
- st.markdown('<div class="nav-bar">', unsafe_allow_html=True)
243
- col1, col2, col3 = st.columns([1, 2, 1])
244
- with col1:
245
- st.markdown('<div class="nav-item">Data Input</div>', unsafe_allow_html=True)
246
- uploaded_file = st.file_uploader("Upload CSV Dataset", type=["csv"])
247
- if uploaded_file:
248
- df = pd.read_csv(uploaded_file)
249
- st.session_state.vector_store = create_vector_store(convert_df_to_text(df))
250
- st.success("Dataset uploaded!")
251
- with col2:
252
- st.markdown('<div class="nav-item">Navigation</div>', unsafe_allow_html=True)
253
- nav_option = st.selectbox("Navigate", ["Model Builder", "Chat", "Train Model"], label_visibility="collapsed")
254
- with col3:
255
- st.markdown('<div class="nav-item">Info</div>', unsafe_allow_html=True)
256
- st.write("Built with Streamlit & Groq")
257
- st.markdown('</div>', unsafe_allow_html=True)
258
-
259
- # Main Content
260
- if nav_option == "Model Builder":
261
- st.markdown('<div class="card"><h2>Model Builder</h2></div>', unsafe_allow_html=True)
262
- mode = st.selectbox("Domain", ["Legal", "Financial", "Marketing"])
263
- model_builder_mode = st.radio("Mode", ["Prebuilt", "Custom"])
264
- st.session_state.model_builder_mode = "prebuilt" if model_builder_mode == "Prebuilt" else "custom"
265
-
266
- if st.session_state.model_builder_mode == "prebuilt":
267
- for name, details in PREBUILT_MODELS.items():
268
- if st.button(f"{name}: {details['description']}", key=name):
269
- st.session_state.prebuilt_selection = name
270
- st.session_state.model_config = details["architecture"]
271
- if st.session_state.prebuilt_selection:
272
- st.json(st.session_state.model_config)
273
- else:
274
- st.session_state.custom_model_type = st.selectbox("Type", ["classification", "regression", "clustering"])
275
- if st.session_state.custom_model_type != "clustering":
276
- layer_count = st.number_input("Layers", min_value=1, value=1)
277
- st.session_state.custom_layers = []
278
- for i in range(int(layer_count)):
279
- size = st.number_input(f"Layer {i+1} Size", min_value=1, value=100, key=f"size_{i}")
280
- activation = st.selectbox(f"Layer {i+1} Activation", ["relu", "tanh"], key=f"act_{i}")
281
- st.session_state.custom_layers.append((size, activation))
282
- optimizer = st.selectbox("Optimizer", ["adam", "sgd"])
283
- st.session_state.model_config = {"type": st.session_state.custom_model_type, "hidden_layers": st.session_state.custom_layers, "optimizer": optimizer, "learning_rate": 0.001}
284
- else:
285
- st.session_state.model_config = {"type": "clustering", "n_clusters": st.number_input("Clusters", min_value=2, value=3)}
286
- if st.button("Finalize"): st.json(st.session_state.model_config)
287
-
288
- elif nav_option == "Chat":
289
- st.markdown('<div class="chat-container"><h3>Chat with Grok</h3></div>', unsafe_allow_html=True)
290
- mode = st.selectbox("Domain", ["Legal", "Financial", "Marketing"])
291
- prompt = st.text_input("Ask a question:")
292
- if prompt:
293
- response = get_groq_response(prompt, mode)
294
- st.session_state.chat_history.append({"role": "user", "content": prompt})
295
- st.session_state.chat_history.append({"role": "bot", "content": response})
296
- for msg in st.session_state.chat_history:
297
- st.markdown(f'<div class={"user-message" if msg["role"] == "user" else "bot-message"}>{msg["content"]}</div>', unsafe_allow_html=True)
298
-
299
- elif nav_option == "Train Model":
300
- if uploaded_file and st.session_state.model_config:
301
- st.markdown('<div class="card"><h2>Train Model</h2></div>', unsafe_allow_html=True)
302
- df = pd.read_csv(uploaded_file)
303
- X = df.drop(columns=[df.columns[-1]]) if st.session_state.model_config["type"] != "clustering" else df
304
- y = df[df.columns[-1]] if st.session_state.model_config["type"] != "clustering" else None
305
- if st.button("Train"):
306
- scaler = StandardScaler()
307
- X_scaled = scaler.fit_transform(X)
308
- model = build_model_from_config(st.session_state.model_config, X_scaled, y)
309
- if st.session_state.model_config["type"] != "clustering":
310
- X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
311
- model.fit(X_train, y_train)
312
- y_pred = model.predict(X_test)
313
- st.session_state.metrics = {"accuracy" if st.session_state.model_config["type"] == "classification" else "r2_score": accuracy_score(y_test, y_pred) if st.session_state.model_config["type"] == "classification" else r2_score(y_test, y_pred)}
314
- else:
315
- model.fit(X_scaled)
316
- st.session_state.metrics = {"silhouette_score": silhouette_score(X_scaled, model.labels_)}
317
- st.json(st.session_state.metrics)
318
- else:
319
- st.warning("Upload a dataset and configure a model first!")
320
-
321
- if __name__ == "__main__":
322
- main()
 
1
  import streamlit as st
2
  import pandas as pd
 
3
  import numpy as np
4
+ import plotly.express as px
5
+ import plotly.graph_objects as go
 
 
 
6
  from ydata_profiling import ProfileReport
7
  from streamlit_pandas_profiling import st_profile_report
8
+ import os
9
+ from dotenv import load_dotenv
10
  from groq import Groq
11
  from langchain_community.vectorstores import FAISS
 
 
12
  from langchain_community.document_loaders import TextLoader
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
14
+ from langchain.embeddings import HuggingFaceEmbeddings
15
+ import re
16
+ from scipy import stats
17
+ from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
18
  import tempfile
19
 
20
+ # Set page config as the first Streamlit command
21
+ st.set_page_config(page_title="Data-Vision Pro", layout="wide")
22
+
23
+ # Load environment variables
24
+ load_dotenv()
25
+
26
+ # Initialize Groq client
27
  client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 
28
 
29
+ # Initialize HuggingFace embeddings for FAISS
30
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
31
 
32
+ # Custom CSS with Silver, Blue, and Gold Theme + Top Nav
33
  st.markdown("""
34
  <style>
35
  :root {
 
41
  .stApp {
42
  background-color: var(--silver);
43
  font-family: 'Inter', sans-serif;
44
+ max-width: 900px;
45
  margin: 0 auto;
46
  padding: 10px;
47
  }
 
50
  color: white;
51
  padding: 15px;
52
  border-radius: 5px;
 
53
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
54
+ text-align: center;
55
  }
56
  .header-title {
57
+ font-size: 1.5rem;
58
  font-weight: 700;
59
  margin: 0;
60
  }
61
  .header-subtitle {
62
+ font-size: 0.9rem;
63
  margin-top: 5px;
64
  }
65
  .nav-bar {
 
83
  background-color: var(--gold);
84
  color: white;
85
  }
 
 
 
 
 
 
 
86
  .chat-container {
87
  background-color: white;
88
  border-radius: 5px;
89
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
90
  padding: 15px;
91
  margin-top: 20px;
 
92
  }
93
  .user-message {
94
  background-color: var(--blue);
95
  color: white;
96
+ border-radius: 18px 18px 4px 18px;
97
+ padding: 12px 16px;
 
98
  margin-left: auto;
99
+ max-width: 80%;
100
  margin-bottom: 10px;
101
  }
102
  .bot-message {
103
  background-color: #F0F0F0;
104
  color: var(--text-color);
105
+ border-radius: 18px 18px 18px 4px;
106
+ padding: 12px 16px;
 
107
  margin-right: auto;
108
+ max-width: 80%;
109
  margin-bottom: 10px;
110
  }
111
+ .footer {
112
+ text-align: center;
113
+ margin-top: 20px;
114
+ color: var(--text-color);
115
+ font-size: 0.8rem;
116
+ }
117
+ .tech-badge {
118
+ display: inline-block;
119
+ background-color: #E6ECEF;
120
+ color: var(--blue);
121
+ padding: 4px 8px;
122
+ border-radius: 12px;
123
+ font-size: 0.7rem;
124
+ margin: 0 4px;
125
+ }
126
+ h2 {
127
+ color: var(--blue);
128
+ border-bottom: 2px solid var(--gold);
129
+ padding-bottom: 5px;
130
+ }
131
  .stButton > button {
132
  background-color: var(--gold);
133
  color: white;
 
141
  }
142
  @media (max-width: 768px) {
143
  .header-title {
144
+ font-size: 1.2rem;
145
  }
146
  .header-subtitle {
147
+ font-size: 0.8rem;
148
  }
149
  .nav-bar {
150
  flex-direction: column;
 
155
  width: 100%;
156
  text-align: center;
157
  }
158
+ .chat-container {
159
  padding: 10px;
160
  }
161
  .stApp {
162
  padding: 5px;
163
  }
164
+ h2 {
165
+ font-size: 1.2rem;
166
+ }
167
  }
168
+ </style>
 
 
 
169
  """, unsafe_allow_html=True)
170
 
171
+ # Helper Functions
172
+ def enhance_section_title(title):
173
+ st.markdown(f"<h2 style='border-bottom: 2px solid var(--gold); padding-bottom: 5px; color: var(--blue);'>{title}</h2>", unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ def update_cleaned_data(df):
176
+ st.session_state.cleaned_data = df
177
+ if 'data_versions' not in st.session_state:
178
+ st.session_state.data_versions = [st.session_state.raw_data.copy()]
179
+ st.session_state.data_versions.append(df.copy())
180
+ st.session_state.dataset_text = convert_df_to_text(df)
181
+ st.success("✅ Action completed successfully!")
182
+ st.rerun()
 
 
 
 
 
 
 
 
 
 
183
 
 
184
  def convert_df_to_text(df):
185
  text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
186
  text += f"Missing Values: {df.isna().sum().sum()}\n"
187
+ text += "Columns:\n"
188
  for col in df.columns:
189
+ if pd.api.types.is_numeric_dtype(df[col]):
190
+ mean_value = f"{df[col].mean():.2f}"
191
+ else:
192
+ mean_value = "N/A"
193
+ text += f"- {col} ({df[col].dtype}): Mean={mean_value}, Min={df[col].min()}, Max={df[col].max()}" if pd.api.types.is_numeric_dtype(df[col]) else f"- {col} ({df[col].dtype}): Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
194
+ text += f", Missing={df[col].isna().sum()}\n"
195
  return text
196
 
197
  def create_vector_store(df_text):
 
200
  temp_path = temp_file.name
201
  loader = TextLoader(temp_path)
202
  documents = loader.load()
203
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
204
+ texts = text_splitter.split_documents(documents)
205
  vector_store = FAISS.from_documents(texts, embeddings)
206
  os.unlink(temp_path)
207
  return vector_store
208
 
209
+ def update_vector_store_with_plot(plot_text, existing_vector_store):
210
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
211
+ temp_file.write(plot_text)
212
+ temp_path = temp_file.name
213
+ loader = TextLoader(temp_path)
214
+ documents = loader.load()
215
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
216
+ texts = text_splitter.split_documents(documents)
217
+ if existing_vector_store:
218
+ existing_vector_store.add_documents(texts)
219
+ else:
220
+ existing_vector_store = FAISS.from_documents(texts, embeddings)
221
+ os.unlink(temp_path)
222
+ return existing_vector_store
223
+
224
+ def extract_plot_data(plot_info, df):
225
+ plot_type = plot_info["type"]
226
+ x_col = plot_info["x"]
227
+ y_col = plot_info["y"] if "y" in plot_info else None
228
+ data = pd.read_json(plot_info["data"])
229
+ plot_text = f"Plot Type: {plot_type}\n"
230
+ plot_text += f"X-Axis: {x_col}\n"
231
+ if y_col:
232
+ plot_text += f"Y-Axis: {y_col}\n"
233
+ if plot_type == "Scatter Plot" and y_col:
234
+ correlation = data[x