Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -23,13 +23,11 @@ st.set_page_config(page_title="Data-Vision Pro", layout="wide", initial_sidebar_
|
|
23 |
# Load environment variables
|
24 |
load_dotenv()
|
25 |
|
26 |
-
# Initialize Groq client
|
27 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
28 |
-
|
29 |
-
# Initialize HuggingFace embeddings
|
30 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
31 |
|
32 |
-
# Custom CSS
|
33 |
st.markdown("""
|
34 |
<style>
|
35 |
:root {
|
@@ -37,22 +35,25 @@ st.markdown("""
|
|
37 |
--blue: #5C89BC;
|
38 |
--gold: #A87E01;
|
39 |
--text-color: #333333;
|
|
|
40 |
}
|
41 |
.stApp {
|
42 |
background-color: var(--silver);
|
43 |
font-family: 'Inter', sans-serif;
|
44 |
-
padding:
|
45 |
height: 100vh;
|
46 |
-
width: 100vw;
|
47 |
overflow-y: auto;
|
|
|
|
|
48 |
}
|
49 |
.header {
|
50 |
background-color: var(--blue);
|
51 |
color: white;
|
52 |
-
padding:
|
53 |
border-radius: 8px;
|
54 |
text-align: center;
|
55 |
-
margin-bottom:
|
|
|
56 |
}
|
57 |
.header-title {
|
58 |
font-size: 2rem;
|
@@ -62,26 +63,27 @@ st.markdown("""
|
|
62 |
.header-subtitle {
|
63 |
font-size: 1rem;
|
64 |
margin-top: 0.5rem;
|
|
|
65 |
}
|
66 |
.nav-bar {
|
67 |
background-color: white;
|
68 |
border-radius: 8px;
|
69 |
padding: 1rem;
|
70 |
display: flex;
|
71 |
-
|
72 |
align-items: center;
|
73 |
flex-wrap: wrap;
|
74 |
-
|
75 |
-
|
76 |
}
|
77 |
.nav-item {
|
78 |
color: var(--blue);
|
79 |
font-weight: 500;
|
80 |
-
|
81 |
-
padding: 0.5rem 1rem;
|
82 |
border-radius: 5px;
|
83 |
-
flex: 1;
|
84 |
text-align: center;
|
|
|
|
|
85 |
}
|
86 |
.nav-item:hover {
|
87 |
background-color: var(--gold);
|
@@ -90,69 +92,93 @@ st.markdown("""
|
|
90 |
.main-container {
|
91 |
background-color: white;
|
92 |
border-radius: 8px;
|
93 |
-
padding:
|
94 |
-
|
95 |
-
margin-bottom:
|
|
|
96 |
}
|
97 |
.chat-container {
|
98 |
background-color: white;
|
99 |
-
border-radius: 8px;
|
100 |
-
padding:
|
101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
}
|
103 |
.user-message, .bot-message {
|
104 |
-
padding: 1rem;
|
105 |
border-radius: 12px;
|
106 |
margin-bottom: 0.5rem;
|
107 |
-
max-width:
|
|
|
108 |
}
|
109 |
.user-message {
|
110 |
background-color: var(--blue);
|
111 |
color: white;
|
112 |
margin-left: auto;
|
|
|
113 |
}
|
114 |
.bot-message {
|
115 |
background-color: #F0F0F0;
|
116 |
color: var(--text-color);
|
117 |
margin-right: auto;
|
|
|
118 |
}
|
119 |
.footer {
|
120 |
text-align: center;
|
121 |
color: var(--text-color);
|
122 |
font-size: 0.9rem;
|
123 |
padding: 1rem 0;
|
|
|
124 |
}
|
125 |
h2 {
|
126 |
color: var(--blue);
|
127 |
border-bottom: 2px solid var(--gold);
|
128 |
padding-bottom: 0.5rem;
|
129 |
font-size: 1.5rem;
|
|
|
130 |
}
|
131 |
.stButton > button {
|
132 |
background-color: var(--gold);
|
133 |
color: white;
|
134 |
border-radius: 5px;
|
135 |
-
padding: 0.5rem
|
|
|
|
|
136 |
}
|
137 |
.stButton > button:hover {
|
138 |
background-color: #8C6B01;
|
139 |
}
|
|
|
|
|
|
|
|
|
140 |
@media (max-width: 768px) {
|
141 |
.header-title { font-size: 1.5rem; }
|
142 |
.header-subtitle { font-size: 0.9rem; }
|
143 |
-
.nav-bar { flex-direction: column; padding: 0.5rem; }
|
144 |
-
.nav-item {
|
145 |
-
.main-container
|
|
|
146 |
h2 { font-size: 1.2rem; }
|
147 |
}
|
148 |
@media (max-width: 480px) {
|
149 |
.header-title { font-size: 1.2rem; }
|
150 |
-
.stApp { padding: 0.
|
|
|
151 |
}
|
152 |
</style>
|
153 |
""", unsafe_allow_html=True)
|
154 |
|
155 |
-
# Helper Functions (
|
156 |
def enhance_section_title(title):
|
157 |
st.markdown(f"<h2>{title}</h2>", unsafe_allow_html=True)
|
158 |
|
@@ -162,41 +188,126 @@ def update_cleaned_data(df):
|
|
162 |
st.session_state.data_versions = [st.session_state.raw_data.copy()]
|
163 |
st.session_state.data_versions.append(df.copy())
|
164 |
st.session_state.dataset_text = convert_df_to_text(df)
|
|
|
165 |
st.success("✅ Action completed!")
|
166 |
st.rerun()
|
167 |
|
168 |
def convert_df_to_text(df):
|
169 |
-
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
def create_vector_store(df_text):
|
173 |
-
|
174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
|
176 |
def update_vector_store_with_plot(plot_text, existing_vector_store):
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
def extract_plot_data(plot_info, df):
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
|
185 |
-
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
|
195 |
# Dataset Preview
|
196 |
def display_dataset_preview():
|
197 |
if 'cleaned_data' in st.session_state:
|
198 |
st.subheader("Dataset Preview")
|
199 |
-
st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True)
|
200 |
|
201 |
# Main App
|
202 |
def main():
|
@@ -209,90 +320,119 @@ def main():
|
|
209 |
""", unsafe_allow_html=True)
|
210 |
|
211 |
# Navigation Bar
|
212 |
-
st.
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
|
|
225 |
|
226 |
# Initialize Session State
|
227 |
if 'vector_store' not in st.session_state:
|
228 |
st.session_state.vector_store = None
|
229 |
if 'chat_history' not in st.session_state:
|
230 |
st.session_state.chat_history = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
231 |
|
232 |
# Main Content
|
233 |
-
st.
|
234 |
-
|
|
|
235 |
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
update_cleaned_data(new_df)
|
267 |
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
-
|
283 |
|
284 |
# Chatbot
|
285 |
-
st.
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
st.
|
290 |
-
|
291 |
-
st.
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
|
297 |
# Footer
|
298 |
st.markdown('<div class="footer">Built with Streamlit & Groq</div>', unsafe_allow_html=True)
|
|
|
23 |
# Load environment variables
|
24 |
load_dotenv()
|
25 |
|
26 |
+
# Initialize Groq client and embeddings
|
27 |
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
|
|
|
|
28 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
29 |
|
30 |
+
# Custom CSS with improved spacing, sizing, and UI/UX
|
31 |
st.markdown("""
|
32 |
<style>
|
33 |
:root {
|
|
|
35 |
--blue: #5C89BC;
|
36 |
--gold: #A87E01;
|
37 |
--text-color: #333333;
|
38 |
+
--spacing-unit: 1.5rem;
|
39 |
}
|
40 |
.stApp {
|
41 |
background-color: var(--silver);
|
42 |
font-family: 'Inter', sans-serif;
|
43 |
+
padding: var(--spacing-unit);
|
44 |
height: 100vh;
|
|
|
45 |
overflow-y: auto;
|
46 |
+
display: flex;
|
47 |
+
flex-direction: column;
|
48 |
}
|
49 |
.header {
|
50 |
background-color: var(--blue);
|
51 |
color: white;
|
52 |
+
padding: var(--spacing-unit);
|
53 |
border-radius: 8px;
|
54 |
text-align: center;
|
55 |
+
margin-bottom: var(--spacing-unit);
|
56 |
+
box-shadow: 0 2px 10px rgba(0,0,0,0.1);
|
57 |
}
|
58 |
.header-title {
|
59 |
font-size: 2rem;
|
|
|
63 |
.header-subtitle {
|
64 |
font-size: 1rem;
|
65 |
margin-top: 0.5rem;
|
66 |
+
opacity: 0.9;
|
67 |
}
|
68 |
.nav-bar {
|
69 |
background-color: white;
|
70 |
border-radius: 8px;
|
71 |
padding: 1rem;
|
72 |
display: flex;
|
73 |
+
gap: 1rem;
|
74 |
align-items: center;
|
75 |
flex-wrap: wrap;
|
76 |
+
margin-bottom: var(--spacing-unit);
|
77 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
78 |
}
|
79 |
.nav-item {
|
80 |
color: var(--blue);
|
81 |
font-weight: 500;
|
82 |
+
padding: 0.75rem 1.5rem;
|
|
|
83 |
border-radius: 5px;
|
|
|
84 |
text-align: center;
|
85 |
+
transition: all 0.2s ease;
|
86 |
+
flex: 1;
|
87 |
}
|
88 |
.nav-item:hover {
|
89 |
background-color: var(--gold);
|
|
|
92 |
.main-container {
|
93 |
background-color: white;
|
94 |
border-radius: 8px;
|
95 |
+
padding: var(--spacing-unit);
|
96 |
+
flex-grow: 1;
|
97 |
+
margin-bottom: var(--spacing-unit);
|
98 |
+
box-shadow: 0 2px 5px rgba(0,0,0,0.05);
|
99 |
}
|
100 |
.chat-container {
|
101 |
background-color: white;
|
102 |
+
border-radius: 8px 8px 0 0;
|
103 |
+
padding: 1rem;
|
104 |
+
position: fixed;
|
105 |
+
bottom: 0;
|
106 |
+
left: var(--spacing-unit);
|
107 |
+
right: var(--spacing-unit);
|
108 |
+
max-height: 40vh;
|
109 |
+
overflow-y: auto;
|
110 |
+
box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
|
111 |
+
z-index: 1000;
|
112 |
+
}
|
113 |
+
.chat-message-container {
|
114 |
+
margin-bottom: 0.5rem;
|
115 |
}
|
116 |
.user-message, .bot-message {
|
117 |
+
padding: 0.75rem 1rem;
|
118 |
border-radius: 12px;
|
119 |
margin-bottom: 0.5rem;
|
120 |
+
max-width: 70%;
|
121 |
+
word-wrap: break-word;
|
122 |
}
|
123 |
.user-message {
|
124 |
background-color: var(--blue);
|
125 |
color: white;
|
126 |
margin-left: auto;
|
127 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
|
128 |
}
|
129 |
.bot-message {
|
130 |
background-color: #F0F0F0;
|
131 |
color: var(--text-color);
|
132 |
margin-right: auto;
|
133 |
+
box-shadow: 0 1px 3px rgba(0,0,0,0.05);
|
134 |
}
|
135 |
.footer {
|
136 |
text-align: center;
|
137 |
color: var(--text-color);
|
138 |
font-size: 0.9rem;
|
139 |
padding: 1rem 0;
|
140 |
+
margin-top: auto;
|
141 |
}
|
142 |
h2 {
|
143 |
color: var(--blue);
|
144 |
border-bottom: 2px solid var(--gold);
|
145 |
padding-bottom: 0.5rem;
|
146 |
font-size: 1.5rem;
|
147 |
+
margin-bottom: 1rem;
|
148 |
}
|
149 |
.stButton > button {
|
150 |
background-color: var(--gold);
|
151 |
color: white;
|
152 |
border-radius: 5px;
|
153 |
+
padding: 0.75rem 1.5rem;
|
154 |
+
font-weight: 500;
|
155 |
+
transition: background-color 0.2s ease;
|
156 |
}
|
157 |
.stButton > button:hover {
|
158 |
background-color: #8C6B01;
|
159 |
}
|
160 |
+
.stTextInput > div > div > input {
|
161 |
+
border-radius: 5px;
|
162 |
+
padding: 0.75rem;
|
163 |
+
}
|
164 |
@media (max-width: 768px) {
|
165 |
.header-title { font-size: 1.5rem; }
|
166 |
.header-subtitle { font-size: 0.9rem; }
|
167 |
+
.nav-bar { flex-direction: column; padding: 0.75rem; gap: 0.5rem; }
|
168 |
+
.nav-item { padding: 0.5rem; }
|
169 |
+
.main-container { padding: 1rem; }
|
170 |
+
.chat-container { padding: 0.75rem; max-height: 50vh; }
|
171 |
h2 { font-size: 1.2rem; }
|
172 |
}
|
173 |
@media (max-width: 480px) {
|
174 |
.header-title { font-size: 1.2rem; }
|
175 |
+
.stApp { padding: 0.75rem; }
|
176 |
+
.chat-container { left: 0.75rem; right: 0.75rem; }
|
177 |
}
|
178 |
</style>
|
179 |
""", unsafe_allow_html=True)
|
180 |
|
181 |
+
# Helper Functions (fully implemented from original intent)
|
182 |
def enhance_section_title(title):
|
183 |
st.markdown(f"<h2>{title}</h2>", unsafe_allow_html=True)
|
184 |
|
|
|
188 |
st.session_state.data_versions = [st.session_state.raw_data.copy()]
|
189 |
st.session_state.data_versions.append(df.copy())
|
190 |
st.session_state.dataset_text = convert_df_to_text(df)
|
191 |
+
st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
|
192 |
st.success("✅ Action completed!")
|
193 |
st.rerun()
|
194 |
|
195 |
def convert_df_to_text(df):
|
196 |
+
text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
|
197 |
+
text += f"Missing Values: {df.isna().sum().sum()}\nColumns:\n"
|
198 |
+
for col in df.columns:
|
199 |
+
text += f"- {col} ({df[col].dtype}): "
|
200 |
+
if pd.api.types.is_numeric_dtype(df[col]):
|
201 |
+
text += f"Mean={df[col].mean():.2f}, Min={df[col].min()}, Max={df[col].max()}"
|
202 |
+
else:
|
203 |
+
text += f"Unique={df[col].nunique()}, Top={df[col].mode()[0] if not df[col].mode().empty else 'N/A'}"
|
204 |
+
text += f", Missing={df[col].isna().sum()}\n"
|
205 |
+
return text
|
206 |
|
207 |
def create_vector_store(df_text):
|
208 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
|
209 |
+
temp_file.write(df_text)
|
210 |
+
temp_path = temp_file.name
|
211 |
+
loader = TextLoader(temp_path)
|
212 |
+
documents = loader.load()
|
213 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
214 |
+
texts = text_splitter.split_documents(documents)
|
215 |
+
vector_store = FAISS.from_documents(texts, embeddings)
|
216 |
+
os.unlink(temp_path)
|
217 |
+
return vector_store
|
218 |
|
219 |
def update_vector_store_with_plot(plot_text, existing_vector_store):
|
220 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
221 |
+
plot_docs = text_splitter.create_documents([plot_text])
|
222 |
+
if existing_vector_store:
|
223 |
+
existing_vector_store.add_documents(plot_docs)
|
224 |
+
else:
|
225 |
+
existing_vector_store = FAISS.from_documents(plot_docs, embeddings)
|
226 |
+
return existing_vector_store
|
227 |
|
228 |
def extract_plot_data(plot_info, df):
|
229 |
+
plot_type = plot_info.get("type", "").lower()
|
230 |
+
x_col = plot_info.get("x", "")
|
231 |
+
y_col = plot_info.get("y", "")
|
232 |
+
if x_col not in df.columns or (plot_type == "scatter" and y_col not in df.columns):
|
233 |
+
return None, "Invalid column names."
|
234 |
+
if plot_type == "scatter":
|
235 |
+
fig = px.scatter(df, x=x_col, y=y_col)
|
236 |
+
return fig, f"Scatter plot: {x_col} vs {y_col}"
|
237 |
+
elif plot_type == "histogram":
|
238 |
+
fig = px.histogram(df, x=x_col)
|
239 |
+
return fig, f"Histogram of {x_col}"
|
240 |
+
return None, "Unsupported plot type."
|
241 |
|
242 |
def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
|
243 |
+
context = ""
|
244 |
+
if vector_store:
|
245 |
+
docs = vector_store.similarity_search(user_input, k=3)
|
246 |
+
context = "\n\nData Context:\n" + "\n".join([f"- {doc.page_content}" for doc in docs])
|
247 |
+
system_prompt = f"You are an expert in {app_mode.lower()} analysis.{context}"
|
248 |
+
response = client.chat.completions.create(
|
249 |
+
model=model,
|
250 |
+
messages=[
|
251 |
+
{"role": "system", "content": system_prompt},
|
252 |
+
{"role": "user", "content": user_input}
|
253 |
+
],
|
254 |
+
temperature=0.7,
|
255 |
+
max_tokens=1024
|
256 |
+
).choices[0].message.content
|
257 |
+
return response
|
258 |
+
|
259 |
+
# Command Functions (basic implementations from original intent)
|
260 |
+
def drop_columns(columns):
|
261 |
+
if 'cleaned_data' in st.session_state:
|
262 |
+
df = st.session_state.cleaned_data.drop(columns=columns)
|
263 |
+
update_cleaned_data(df)
|
264 |
|
265 |
+
def generate_scatter_plot(params):
|
266 |
+
if 'cleaned_data' in st.session_state:
|
267 |
+
df = st.session_state.cleaned_data
|
268 |
+
x, y = params.get("x"), params.get("y")
|
269 |
+
if x in df.columns and y in df.columns:
|
270 |
+
fig = px.scatter(df, x=x, y=y)
|
271 |
+
st.plotly_chart(fig, use_container_width=True)
|
272 |
+
plot_text = f"Scatter plot of {x} vs {y}"
|
273 |
+
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
274 |
+
|
275 |
+
def generate_histogram(params):
|
276 |
+
if 'cleaned_data' in st.session_state:
|
277 |
+
df = st.session_state.cleaned_data
|
278 |
+
x = params.get("x")
|
279 |
+
if x in df.columns:
|
280 |
+
fig = px.histogram(df, x=x)
|
281 |
+
st.plotly_chart(fig, use_container_width=True)
|
282 |
+
plot_text = f"Histogram of {x}"
|
283 |
+
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
284 |
+
|
285 |
+
def analyze_plot():
|
286 |
+
if 'cleaned_data' in st.session_state:
|
287 |
+
st.write("Plot analysis not fully implemented yet.")
|
288 |
+
|
289 |
+
def parse_command(command):
|
290 |
+
command = command.lower().strip()
|
291 |
+
if "drop" in command:
|
292 |
+
columns = re.findall(r"drop\s+columns?\s+(.+)", command)
|
293 |
+
if columns:
|
294 |
+
cols = [col.strip() for col in columns[0].split(",")]
|
295 |
+
return {"action": "drop_columns", "columns": cols}
|
296 |
+
elif "scatter" in command:
|
297 |
+
match = re.search(r"scatter\s+plot\s+x=(\w+)\s+y=(\w+)", command)
|
298 |
+
if match:
|
299 |
+
return {"action": "scatter_plot", "x": match.group(1), "y": match.group(2)}
|
300 |
+
elif "histogram" in command:
|
301 |
+
match = re.search(r"histogram\s+of\s+(\w+)", command)
|
302 |
+
if match:
|
303 |
+
return {"action": "histogram", "x": match.group(1)}
|
304 |
+
return None
|
305 |
|
306 |
# Dataset Preview
|
307 |
def display_dataset_preview():
|
308 |
if 'cleaned_data' in st.session_state:
|
309 |
st.subheader("Dataset Preview")
|
310 |
+
st.dataframe(st.session_state.cleaned_data.head(), use_container_width=True, height=200)
|
311 |
|
312 |
# Main App
|
313 |
def main():
|
|
|
320 |
""", unsafe_allow_html=True)
|
321 |
|
322 |
# Navigation Bar
|
323 |
+
with st.container():
|
324 |
+
st.markdown('<div class="nav-bar">', unsafe_allow_html=True)
|
325 |
+
col1, col2, col3, col4 = st.columns([2, 2, 2, 1])
|
326 |
+
with col1:
|
327 |
+
uploaded_file = st.file_uploader("Upload File", type=["csv", "xlsx"], key="file_uploader")
|
328 |
+
with col2:
|
329 |
+
app_mode = st.selectbox("Mode", ["Data Upload", "Data Cleaning", "EDA"], label_visibility="collapsed")
|
330 |
+
with col3:
|
331 |
+
model = st.selectbox("Model", ["llama3-70b-8192", "llama3-8b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], label_visibility="collapsed")
|
332 |
+
with col4:
|
333 |
+
if 'cleaned_data' in st.session_state:
|
334 |
+
csv = st.session_state.cleaned_data.to_csv(index=False)
|
335 |
+
st.download_button(label="Download", data=csv, file_name='cleaned_data.csv', mime='text/csv')
|
336 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
337 |
|
338 |
# Initialize Session State
|
339 |
if 'vector_store' not in st.session_state:
|
340 |
st.session_state.vector_store = None
|
341 |
if 'chat_history' not in st.session_state:
|
342 |
st.session_state.chat_history = []
|
343 |
+
if 'raw_data' not in st.session_state:
|
344 |
+
st.session_state.raw_data = None
|
345 |
+
if 'cleaned_data' not in st.session_state:
|
346 |
+
st.session_state.cleaned_data = None
|
347 |
+
if 'data_versions' not in st.session_state:
|
348 |
+
st.session_state.data_versions = []
|
349 |
+
if 'dataset_text' not in st.session_state:
|
350 |
+
st.session_state.dataset_text = ""
|
351 |
|
352 |
# Main Content
|
353 |
+
with st.container():
|
354 |
+
st.markdown('<div class="main-container">', unsafe_allow_html=True)
|
355 |
+
display_dataset_preview()
|
356 |
|
357 |
+
if app_mode == "Data Upload":
|
358 |
+
enhance_section_title("📤 Data Upload")
|
359 |
+
if uploaded_file:
|
360 |
+
try:
|
361 |
+
df = pd.read_csv(uploaded_file) if uploaded_file.name.endswith('.csv') else pd.read_excel(uploaded_file)
|
362 |
+
st.session_state.raw_data = df
|
363 |
+
st.session_state.cleaned_data = df.copy()
|
364 |
+
st.session_state.dataset_text = convert_df_to_text(df)
|
365 |
+
st.session_state.vector_store = create_vector_store(st.session_state.dataset_text)
|
366 |
+
st.session_state.data_versions = [df.copy()]
|
367 |
+
col1, col2, col3 = st.columns(3)
|
368 |
+
with col1: st.metric("Rows", df.shape[0])
|
369 |
+
with col2: st.metric("Columns", df.shape[1])
|
370 |
+
with col3: st.metric("Missing", df.isna().sum().sum())
|
371 |
+
if st.button("Generate Report"):
|
372 |
+
pr = ProfileReport(df, explorative=True)
|
373 |
+
st_profile_report(pr)
|
374 |
+
except Exception as e:
|
375 |
+
st.error(f"Error: {e}")
|
376 |
|
377 |
+
elif app_mode == "Data Cleaning":
|
378 |
+
enhance_section_title("🧹 Data Cleaning")
|
379 |
+
if 'cleaned_data' not in st.session_state:
|
380 |
+
st.warning("Upload data first.")
|
381 |
+
else:
|
382 |
+
df = st.session_state.cleaned_data.copy()
|
383 |
+
columns_to_drop = st.multiselect("Drop Columns", df.columns)
|
384 |
+
if st.button("Drop Selected"):
|
385 |
+
new_df = df.drop(columns=columns_to_drop)
|
386 |
+
update_cleaned_data(new_df)
|
|
|
387 |
|
388 |
+
elif app_mode == "EDA":
|
389 |
+
enhance_section_title("🔍 EDA")
|
390 |
+
if 'cleaned_data' not in st.session_state:
|
391 |
+
st.warning("Upload data first.")
|
392 |
+
else:
|
393 |
+
df = st.session_state.cleaned_data.copy()
|
394 |
+
plot_type = st.selectbox("Plot Type", ["Scatter Plot", "Histogram"])
|
395 |
+
x_axis = st.selectbox("X-axis", df.columns)
|
396 |
+
if plot_type == "Scatter Plot":
|
397 |
+
y_axis = st.selectbox("Y-axis", df.columns)
|
398 |
+
if st.button("Generate"):
|
399 |
+
fig = px.scatter(df, x=x_axis, y=y_axis)
|
400 |
+
st.plotly_chart(fig, use_container_width=True)
|
401 |
+
plot_text = f"Scatter plot of {x_axis} vs {y_axis}"
|
402 |
+
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
403 |
+
else:
|
404 |
+
if st.button("Generate"):
|
405 |
+
fig = px.histogram(df, x=x_axis)
|
406 |
+
st.plotly_chart(fig, use_container_width=True)
|
407 |
+
plot_text = f"Histogram of {x_axis}"
|
408 |
+
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
409 |
|
410 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
411 |
|
412 |
# Chatbot
|
413 |
+
with st.container():
|
414 |
+
st.markdown('<div class="chat-container">', unsafe_allow_html=True)
|
415 |
+
st.subheader("💬 Chatbot")
|
416 |
+
for message in st.session_state.chat_history:
|
417 |
+
with st.container():
|
418 |
+
st.markdown(f'<div class="chat-message-container"><div class="{message["role"]}-message">{message["content"]}</div></div>', unsafe_allow_html=True)
|
419 |
+
if user_input := st.chat_input("Ask anything..."):
|
420 |
+
command = parse_command(user_input)
|
421 |
+
if command:
|
422 |
+
if command["action"] == "drop_columns":
|
423 |
+
drop_columns(command["columns"])
|
424 |
+
elif command["action"] == "scatter_plot":
|
425 |
+
generate_scatter_plot({"x": command["x"], "y": command["y"]})
|
426 |
+
elif command["action"] == "histogram":
|
427 |
+
generate_histogram({"x": command["x"]})
|
428 |
+
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
429 |
+
st.session_state.chat_history.append({"role": "assistant", "content": "Command executed."})
|
430 |
+
else:
|
431 |
+
st.session_state.chat_history.append({"role": "user", "content": user_input})
|
432 |
+
response = get_chatbot_response(user_input, app_mode, st.session_state.vector_store, model)
|
433 |
+
st.session_state.chat_history.append({"role": "assistant", "content": response})
|
434 |
+
st.rerun()
|
435 |
+
st.markdown('</div>', unsafe_allow_html=True)
|
436 |
|
437 |
# Footer
|
438 |
st.markdown('<div class="footer">Built with Streamlit & Groq</div>', unsafe_allow_html=True)
|