Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -29,59 +29,55 @@ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
|
|
29 |
# Initialize HuggingFace embeddings for FAISS
|
30 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
31 |
|
32 |
-
# Custom CSS
|
33 |
st.markdown("""
|
34 |
<style>
|
35 |
:root {
|
36 |
-
--
|
37 |
-
--
|
38 |
-
--
|
39 |
-
--
|
40 |
-
--light-grey: #F3F4F6;
|
41 |
-
--white: #FFFFFF;
|
42 |
-
--border-grey: #E5E7EB;
|
43 |
}
|
44 |
.stApp {
|
45 |
-
background-color: var(--
|
46 |
font-family: 'Inter', sans-serif;
|
47 |
max-width: 900px;
|
48 |
margin: 0 auto;
|
|
|
49 |
}
|
50 |
.header {
|
51 |
-
background-color: var(--
|
52 |
-
|
53 |
padding: 15px;
|
54 |
-
border-radius:
|
55 |
-
box-shadow: 0 2px 4px rgba(0,0,0,0.
|
56 |
text-align: center;
|
57 |
}
|
58 |
.header-title {
|
59 |
-
color: var(--dark-blue);
|
60 |
font-size: 1.5rem;
|
61 |
font-weight: 700;
|
62 |
margin: 0;
|
63 |
}
|
64 |
.header-subtitle {
|
65 |
-
color: var(--medium-grey);
|
66 |
font-size: 0.9rem;
|
67 |
margin-top: 5px;
|
68 |
}
|
69 |
.sidebar .sidebar-content {
|
70 |
-
background-color:
|
71 |
-
border-radius:
|
72 |
-
box-shadow: 0 4px
|
73 |
padding: 15px;
|
74 |
}
|
75 |
.chat-container {
|
76 |
-
background-color:
|
77 |
-
border-radius:
|
78 |
-
box-shadow: 0 4px
|
79 |
padding: 15px;
|
80 |
margin-top: 20px;
|
81 |
}
|
82 |
.user-message {
|
83 |
-
background-color: var(--
|
84 |
-
color:
|
85 |
border-radius: 18px 18px 4px 18px;
|
86 |
padding: 12px 16px;
|
87 |
margin-left: auto;
|
@@ -89,8 +85,8 @@ st.markdown("""
|
|
89 |
margin-bottom: 10px;
|
90 |
}
|
91 |
.bot-message {
|
92 |
-
background-color:
|
93 |
-
color: var(--
|
94 |
border-radius: 18px 18px 18px 4px;
|
95 |
padding: 12px 16px;
|
96 |
margin-right: auto;
|
@@ -100,24 +96,57 @@ st.markdown("""
|
|
100 |
.footer {
|
101 |
text-align: center;
|
102 |
margin-top: 20px;
|
103 |
-
color: var(--
|
104 |
font-size: 0.8rem;
|
105 |
}
|
106 |
.tech-badge {
|
107 |
display: inline-block;
|
108 |
-
background-color:
|
109 |
-
color: var(--
|
110 |
padding: 4px 8px;
|
111 |
border-radius: 12px;
|
112 |
font-size: 0.7rem;
|
113 |
margin: 0 4px;
|
114 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
</style>
|
116 |
""", unsafe_allow_html=True)
|
117 |
|
118 |
-
# Helper Functions
|
119 |
def enhance_section_title(title):
|
120 |
-
st.markdown(f"<h2 style='border-bottom: 2px solid var(--
|
121 |
|
122 |
def update_cleaned_data(df):
|
123 |
st.session_state.cleaned_data = df
|
@@ -129,7 +158,6 @@ def update_cleaned_data(df):
|
|
129 |
st.rerun()
|
130 |
|
131 |
def convert_df_to_text(df):
|
132 |
-
"""Convert DataFrame to text for vector store and context"""
|
133 |
text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
|
134 |
text += f"Missing Values: {df.isna().sum().sum()}\n"
|
135 |
text += "Columns:\n"
|
@@ -143,11 +171,9 @@ def convert_df_to_text(df):
|
|
143 |
return text
|
144 |
|
145 |
def create_vector_store(df_text):
|
146 |
-
"""Create a FAISS vector store from dataset text"""
|
147 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
|
148 |
temp_file.write(df_text)
|
149 |
temp_path = temp_file.name
|
150 |
-
|
151 |
loader = TextLoader(temp_path)
|
152 |
documents = loader.load()
|
153 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
@@ -157,36 +183,29 @@ def create_vector_store(df_text):
|
|
157 |
return vector_store
|
158 |
|
159 |
def update_vector_store_with_plot(plot_text, existing_vector_store):
|
160 |
-
"""Update the FAISS vector store with plot-derived text"""
|
161 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
|
162 |
temp_file.write(plot_text)
|
163 |
temp_path = temp_file.name
|
164 |
-
|
165 |
loader = TextLoader(temp_path)
|
166 |
documents = loader.load()
|
167 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
168 |
texts = text_splitter.split_documents(documents)
|
169 |
-
|
170 |
if existing_vector_store:
|
171 |
existing_vector_store.add_documents(texts)
|
172 |
else:
|
173 |
existing_vector_store = FAISS.from_documents(texts, embeddings)
|
174 |
-
|
175 |
os.unlink(temp_path)
|
176 |
return existing_vector_store
|
177 |
|
178 |
def extract_plot_data(plot_info, df):
|
179 |
-
"""Extract numerical data from the last generated plot and convert to text"""
|
180 |
plot_type = plot_info["type"]
|
181 |
x_col = plot_info["x"]
|
182 |
y_col = plot_info["y"] if "y" in plot_info else None
|
183 |
data = pd.read_json(plot_info["data"])
|
184 |
-
|
185 |
plot_text = f"Plot Type: {plot_type}\n"
|
186 |
plot_text += f"X-Axis: {x_col}\n"
|
187 |
if y_col:
|
188 |
plot_text += f"Y-Axis: {y_col}\n"
|
189 |
-
|
190 |
if plot_type == "Scatter Plot" and y_col:
|
191 |
correlation = data[x_col].corr(data[y_col])
|
192 |
slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
|
@@ -212,13 +231,11 @@ def extract_plot_data(plot_info, df):
|
|
212 |
plot_text += "Correlation Matrix:\n"
|
213 |
for col1 in corr.columns:
|
214 |
for col2 in corr.index:
|
215 |
-
if col1 < col2:
|
216 |
plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
|
217 |
-
|
218 |
return plot_text
|
219 |
|
220 |
def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
|
221 |
-
"""Get response from Groq with vector store context including plot data"""
|
222 |
system_prompt = (
|
223 |
"You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
|
224 |
f"The user is on the '{app_mode}' page:\n"
|
@@ -227,7 +244,6 @@ def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-
|
|
227 |
"- **EDA**: Visualize data (e.g., scatter plots, histograms) and analyze plots.\n"
|
228 |
"When analyzing plots, provide detailed insights based on numerical data extracted from them."
|
229 |
)
|
230 |
-
|
231 |
context = ""
|
232 |
if vector_store:
|
233 |
docs = vector_store.similarity_search(user_input, k=3)
|
@@ -236,7 +252,6 @@ def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-
|
|
236 |
system_prompt += f"Use this dataset and plot context to augment your response:\n{context}"
|
237 |
else:
|
238 |
system_prompt += "No dataset or plot data is loaded. Assist based on app functionality."
|
239 |
-
|
240 |
try:
|
241 |
response = client.chat.completions.create(
|
242 |
model=model,
|
@@ -410,9 +425,11 @@ def main():
|
|
410 |
if 'raw_data' not in st.session_state:
|
411 |
st.warning("Please upload data first in the Data Upload section.")
|
412 |
st.stop()
|
413 |
-
if 'cleaned_data'
|
|
|
|
|
414 |
st.session_state.cleaned_data = st.session_state.raw_data.copy()
|
415 |
-
|
416 |
|
417 |
enhance_section_title("📊 Data Health Dashboard")
|
418 |
with st.expander("Explore Data Health Metrics", expanded=True):
|
@@ -579,7 +596,6 @@ def main():
|
|
579 |
"y": y_axis,
|
580 |
"data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
|
581 |
}
|
582 |
-
# Extract numerical data and update vector store
|
583 |
plot_text = extract_plot_data(st.session_state.last_plot, df)
|
584 |
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
585 |
with st.expander("Extracted Plot Data"):
|
|
|
29 |
# Initialize HuggingFace embeddings for FAISS
|
30 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
31 |
|
32 |
+
# Custom CSS with Silver, Blue, and Gold Theme + Responsiveness
|
33 |
st.markdown("""
|
34 |
<style>
|
35 |
:root {
|
36 |
+
--silver: #D8D8D8;
|
37 |
+
--blue: #5C89BC;
|
38 |
+
--gold: #A87E01;
|
39 |
+
--text-color: #333333;
|
|
|
|
|
|
|
40 |
}
|
41 |
.stApp {
|
42 |
+
background-color: var(--silver);
|
43 |
font-family: 'Inter', sans-serif;
|
44 |
max-width: 900px;
|
45 |
margin: 0 auto;
|
46 |
+
padding: 10px;
|
47 |
}
|
48 |
.header {
|
49 |
+
background-color: var(--blue);
|
50 |
+
color: white;
|
51 |
padding: 15px;
|
52 |
+
border-radius: 5px;
|
53 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
54 |
text-align: center;
|
55 |
}
|
56 |
.header-title {
|
|
|
57 |
font-size: 1.5rem;
|
58 |
font-weight: 700;
|
59 |
margin: 0;
|
60 |
}
|
61 |
.header-subtitle {
|
|
|
62 |
font-size: 0.9rem;
|
63 |
margin-top: 5px;
|
64 |
}
|
65 |
.sidebar .sidebar-content {
|
66 |
+
background-color: white;
|
67 |
+
border-radius: 5px;
|
68 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
69 |
padding: 15px;
|
70 |
}
|
71 |
.chat-container {
|
72 |
+
background-color: white;
|
73 |
+
border-radius: 5px;
|
74 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
75 |
padding: 15px;
|
76 |
margin-top: 20px;
|
77 |
}
|
78 |
.user-message {
|
79 |
+
background-color: var(--blue);
|
80 |
+
color: white;
|
81 |
border-radius: 18px 18px 4px 18px;
|
82 |
padding: 12px 16px;
|
83 |
margin-left: auto;
|
|
|
85 |
margin-bottom: 10px;
|
86 |
}
|
87 |
.bot-message {
|
88 |
+
background-color: #F0F0F0;
|
89 |
+
color: var(--text-color);
|
90 |
border-radius: 18px 18px 18px 4px;
|
91 |
padding: 12px 16px;
|
92 |
margin-right: auto;
|
|
|
96 |
.footer {
|
97 |
text-align: center;
|
98 |
margin-top: 20px;
|
99 |
+
color: var(--text-color);
|
100 |
font-size: 0.8rem;
|
101 |
}
|
102 |
.tech-badge {
|
103 |
display: inline-block;
|
104 |
+
background-color: #E6ECEF;
|
105 |
+
color: var(--blue);
|
106 |
padding: 4px 8px;
|
107 |
border-radius: 12px;
|
108 |
font-size: 0.7rem;
|
109 |
margin: 0 4px;
|
110 |
}
|
111 |
+
h2 {
|
112 |
+
color: var(--blue);
|
113 |
+
border-bottom: 2px solid var(--gold);
|
114 |
+
padding-bottom: 5px;
|
115 |
+
}
|
116 |
+
.stButton > button {
|
117 |
+
background-color: var(--gold);
|
118 |
+
color: white;
|
119 |
+
border-radius: 5px;
|
120 |
+
padding: 8px 16px;
|
121 |
+
border: none;
|
122 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
123 |
+
}
|
124 |
+
.stButton > button:hover {
|
125 |
+
background-color: #8C6B01;
|
126 |
+
}
|
127 |
+
@media (max-width: 768px) {
|
128 |
+
.header-title {
|
129 |
+
font-size: 1.2rem;
|
130 |
+
}
|
131 |
+
.header-subtitle {
|
132 |
+
font-size: 0.8rem;
|
133 |
+
}
|
134 |
+
.chat-container, .sidebar .sidebar-content {
|
135 |
+
padding: 10px;
|
136 |
+
}
|
137 |
+
.stApp {
|
138 |
+
padding: 5px;
|
139 |
+
}
|
140 |
+
h2 {
|
141 |
+
font-size: 1.2rem;
|
142 |
+
}
|
143 |
+
}
|
144 |
</style>
|
145 |
""", unsafe_allow_html=True)
|
146 |
|
147 |
+
# Helper Functions (unchanged)
|
148 |
def enhance_section_title(title):
|
149 |
+
st.markdown(f"<h2 style='border-bottom: 2px solid var(--gold); padding-bottom: 5px; color: var(--blue);'>{title}</h2>", unsafe_allow_html=True)
|
150 |
|
151 |
def update_cleaned_data(df):
|
152 |
st.session_state.cleaned_data = df
|
|
|
158 |
st.rerun()
|
159 |
|
160 |
def convert_df_to_text(df):
|
|
|
161 |
text = f"Dataset Summary: {df.shape[0]} rows, {df.shape[1]} columns\n"
|
162 |
text += f"Missing Values: {df.isna().sum().sum()}\n"
|
163 |
text += "Columns:\n"
|
|
|
171 |
return text
|
172 |
|
173 |
def create_vector_store(df_text):
|
|
|
174 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
|
175 |
temp_file.write(df_text)
|
176 |
temp_path = temp_file.name
|
|
|
177 |
loader = TextLoader(temp_path)
|
178 |
documents = loader.load()
|
179 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
|
|
183 |
return vector_store
|
184 |
|
185 |
def update_vector_store_with_plot(plot_text, existing_vector_store):
|
|
|
186 |
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as temp_file:
|
187 |
temp_file.write(plot_text)
|
188 |
temp_path = temp_file.name
|
|
|
189 |
loader = TextLoader(temp_path)
|
190 |
documents = loader.load()
|
191 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
|
192 |
texts = text_splitter.split_documents(documents)
|
|
|
193 |
if existing_vector_store:
|
194 |
existing_vector_store.add_documents(texts)
|
195 |
else:
|
196 |
existing_vector_store = FAISS.from_documents(texts, embeddings)
|
|
|
197 |
os.unlink(temp_path)
|
198 |
return existing_vector_store
|
199 |
|
200 |
def extract_plot_data(plot_info, df):
|
|
|
201 |
plot_type = plot_info["type"]
|
202 |
x_col = plot_info["x"]
|
203 |
y_col = plot_info["y"] if "y" in plot_info else None
|
204 |
data = pd.read_json(plot_info["data"])
|
|
|
205 |
plot_text = f"Plot Type: {plot_type}\n"
|
206 |
plot_text += f"X-Axis: {x_col}\n"
|
207 |
if y_col:
|
208 |
plot_text += f"Y-Axis: {y_col}\n"
|
|
|
209 |
if plot_type == "Scatter Plot" and y_col:
|
210 |
correlation = data[x_col].corr(data[y_col])
|
211 |
slope, intercept, r_value, p_value, std_err = stats.linregress(data[x_col].dropna(), data[y_col].dropna())
|
|
|
231 |
plot_text += "Correlation Matrix:\n"
|
232 |
for col1 in corr.columns:
|
233 |
for col2 in corr.index:
|
234 |
+
if col1 < col2:
|
235 |
plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
|
|
|
236 |
return plot_text
|
237 |
|
238 |
def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
|
|
|
239 |
system_prompt = (
|
240 |
"You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
|
241 |
f"The user is on the '{app_mode}' page:\n"
|
|
|
244 |
"- **EDA**: Visualize data (e.g., scatter plots, histograms) and analyze plots.\n"
|
245 |
"When analyzing plots, provide detailed insights based on numerical data extracted from them."
|
246 |
)
|
|
|
247 |
context = ""
|
248 |
if vector_store:
|
249 |
docs = vector_store.similarity_search(user_input, k=3)
|
|
|
252 |
system_prompt += f"Use this dataset and plot context to augment your response:\n{context}"
|
253 |
else:
|
254 |
system_prompt += "No dataset or plot data is loaded. Assist based on app functionality."
|
|
|
255 |
try:
|
256 |
response = client.chat.completions.create(
|
257 |
model=model,
|
|
|
425 |
if 'raw_data' not in st.session_state:
|
426 |
st.warning("Please upload data first in the Data Upload section.")
|
427 |
st.stop()
|
428 |
+
if 'cleaned_data' in st.session_state:
|
429 |
+
df = st.session_state.cleaned_data.copy()
|
430 |
+
else:
|
431 |
st.session_state.cleaned_data = st.session_state.raw_data.copy()
|
432 |
+
df = st.session_state.cleaned_data.copy()
|
433 |
|
434 |
enhance_section_title("📊 Data Health Dashboard")
|
435 |
with st.expander("Explore Data Health Metrics", expanded=True):
|
|
|
596 |
"y": y_axis,
|
597 |
"data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
|
598 |
}
|
|
|
599 |
plot_text = extract_plot_data(st.session_state.last_plot, df)
|
600 |
st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
|
601 |
with st.expander("Extracted Plot Data"):
|