Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ import warnings
|
|
8 |
import google.generativeai as genai
|
9 |
import os
|
10 |
from typing import List, Dict, Any, Tuple, Optional
|
|
|
11 |
|
12 |
# --- Configuration & Constants ---
|
13 |
warnings.filterwarnings('ignore')
|
@@ -43,8 +44,8 @@ class DataExplorerApp:
|
|
43 |
status_output = gr.Markdown("Status: Awaiting data...")
|
44 |
api_key_input = gr.Textbox(label="π Gemini API Key", type="password", placeholder="Enter key to enable AI...")
|
45 |
suggestion_btn = gr.Button("Get Smart Suggestions", variant="secondary", interactive=False)
|
46 |
-
rows_stat, cols_stat = gr.Textbox("0", interactive=False, show_label=False
|
47 |
-
quality_stat, time_cols_stat = gr.Textbox("0%", interactive=False, show_label=False
|
48 |
suggestion_buttons = [gr.Button(visible=False) for _ in range(5)]
|
49 |
plot_type_dd = gr.Dropdown(['histogram', 'bar', 'scatter', 'box'], label="Plot Type", value='histogram')
|
50 |
x_col_dd = gr.Dropdown([], label="X-Axis / Column", interactive=False)
|
@@ -67,10 +68,8 @@ class DataExplorerApp:
|
|
67 |
with cockpit_page:
|
68 |
gr.Markdown("## π Data Cockpit: At-a-Glance Overview")
|
69 |
with gr.Row():
|
70 |
-
|
71 |
-
|
72 |
-
with gr.Column(elem_classes="stat-card"): gr.Markdown("<div class='stat-card-title'>Data Quality</div>"); quality_stat
|
73 |
-
with gr.Column(elem_classes="stat-card"): gr.Markdown("<div class='stat-card-title'>Date/Time Cols</div>"); time_cols_stat
|
74 |
with gr.Accordion(label="β¨ AI Smart Suggestions", open=True): [btn for btn in suggestion_buttons]
|
75 |
with deep_dive_page:
|
76 |
gr.Markdown("## π Deep Dive: Manual Dashboard Builder"); gr.Markdown("Construct visualizations to investigate specific relationships.")
|
@@ -84,15 +83,12 @@ class DataExplorerApp:
|
|
84 |
|
85 |
# Event Handlers Registration
|
86 |
pages, nav_buttons = [welcome_page, cockpit_page, deep_dive_page, copilot_page], [cockpit_btn, deep_dive_btn, copilot_btn]
|
87 |
-
|
88 |
for i, btn in enumerate(nav_buttons):
|
89 |
btn.click(lambda id=btn.elem_id: self._switch_page(id, pages), outputs=pages).then(
|
90 |
lambda i=i: [gr.update(elem_classes="selected" if j==i else "") for j in range(len(nav_buttons))], outputs=nav_buttons)
|
91 |
|
92 |
file_input.upload(self.load_and_process_file, inputs=[file_input], outputs=[
|
93 |
-
state_var, status_output, *pages, rows_stat, cols_stat, quality_stat, time_cols_stat,
|
94 |
-
x_col_dd, y_col_dd, add_plot_btn]).then(lambda: self._switch_page("cockpit", pages), outputs=pages).then(
|
95 |
-
lambda: [gr.update(elem_classes="selected"), gr.update(elem_classes=""), gr.update(elem_classes="")], outputs=nav_buttons)
|
96 |
|
97 |
api_key_input.change(lambda x: gr.update(interactive=bool(x)), inputs=[api_key_input], outputs=[suggestion_btn])
|
98 |
chat_input.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[chat_input], outputs=[chat_submit_btn])
|
@@ -103,9 +99,7 @@ class DataExplorerApp:
|
|
103 |
|
104 |
suggestion_btn.click(self.get_ai_suggestions, inputs=[state_var, api_key_input], outputs=suggestion_buttons)
|
105 |
for btn in suggestion_buttons:
|
106 |
-
btn.click(self.handle_suggestion_click, inputs=[btn], outputs=[*pages, chat_input])
|
107 |
-
lambda: self._switch_page("co-pilot", pages), outputs=pages).then(
|
108 |
-
lambda: (gr.update(elem_classes=""), gr.update(elem_classes=""), gr.update(elem_classes="selected")), outputs=nav_buttons)
|
109 |
|
110 |
chat_submit_btn.click(self.respond_to_chat, [state_var, api_key_input, chat_input, chatbot], [chatbot, copilot_explanation, copilot_code, copilot_plot, copilot_table]).then(lambda: "", outputs=[chat_input])
|
111 |
chat_input.submit(self.respond_to_chat, [state_var, api_key_input, chat_input, chatbot], [chatbot, copilot_explanation, copilot_code, copilot_plot, copilot_table]).then(lambda: "", outputs=[chat_input])
|
@@ -113,12 +107,12 @@ class DataExplorerApp:
|
|
113 |
|
114 |
def launch(self): self.demo.launch(debug=True)
|
115 |
|
|
|
116 |
def _switch_page(self, page_id: str, all_pages: List) -> List[gr.update]:
|
117 |
visibility = {"welcome":0, "cockpit":1, "deep_dive":2, "co-pilot":3}
|
118 |
return [gr.update(visible=i == visibility.get(page_id, 0)) for i in range(len(all_pages))]
|
119 |
|
120 |
-
def _update_plot_controls(self, plot_type: str) -> gr.update:
|
121 |
-
return gr.update(visible=plot_type in ['scatter', 'box'])
|
122 |
|
123 |
def load_and_process_file(self, file_obj: Any) -> Tuple[Any, ...]:
|
124 |
try:
|
@@ -127,16 +121,14 @@ class DataExplorerApp:
|
|
127 |
state = {'df': df, 'metadata': metadata, 'dashboard_plots': []}
|
128 |
rows, cols, quality = metadata['shape'][0], metadata['shape'][1], metadata['data_quality']
|
129 |
page_updates = self._switch_page("cockpit", [0,1,2,3])
|
130 |
-
return (state, f"β
**{os.path.basename(file_obj.name)}** loaded.", *page_updates,
|
131 |
-
f"{rows:,}", f"{cols}", f"{quality}%", f"{len(metadata['datetime_cols'])}",
|
132 |
gr.update(choices=metadata['columns'], interactive=True), gr.update(choices=metadata['columns'], interactive=True), gr.update(interactive=True))
|
133 |
except Exception as e:
|
134 |
gr.Error(f"File Load Error: {e}"); page_updates = self._switch_page("welcome", [0,1,2,3]);
|
135 |
return {}, f"β Error: {e}", *page_updates, "0", "0", "0%", "0", gr.update(choices=[], interactive=False), gr.update(choices=[], interactive=False), gr.update(interactive=False)
|
136 |
|
137 |
def _extract_dataset_metadata(self, df: pd.DataFrame) -> Dict[str, Any]:
|
138 |
-
rows, cols = df.shape
|
139 |
-
quality = round((df.notna().sum().sum() / (rows * cols)) * 100, 1) if rows * cols > 0 else 0
|
140 |
return {'shape': (rows, cols), 'columns': df.columns.tolist(), 'numeric_cols': df.select_dtypes(include=np.number).columns.tolist(),
|
141 |
'categorical_cols': df.select_dtypes(include=['object', 'category']).columns.tolist(), 'datetime_cols': df.select_dtypes(include=['datetime64', 'datetime64[ns]']).columns.tolist(),
|
142 |
'dtypes_head': df.head(3).to_string(), 'data_quality': quality}
|
@@ -168,9 +160,7 @@ class DataExplorerApp:
|
|
168 |
def get_ai_suggestions(self, state: Dict, api_key: str) -> List[gr.update]:
|
169 |
if not api_key: gr.Warning("API Key is required."); return [gr.update(visible=False)]*5
|
170 |
if not state: gr.Warning("Please load data first."); return [gr.update(visible=False)]*5
|
171 |
-
|
172 |
-
metadata = state.get('metadata', {})
|
173 |
-
columns = metadata.get('columns', [])
|
174 |
prompt = f"From columns {columns}, generate 4 impactful analytical questions. Return ONLY a JSON list of strings."
|
175 |
try:
|
176 |
genai.configure(api_key=api_key); suggestions = json.loads(genai.GenerativeModel('gemini-1.5-flash').generate_content(prompt).text)
|
@@ -180,24 +170,37 @@ class DataExplorerApp:
|
|
180 |
def handle_suggestion_click(self, question: str) -> Tuple[gr.update, ...]:
|
181 |
return *self._switch_page("co-pilot", [0,1,2,3]), question
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
def respond_to_chat(self, state: Dict, api_key: str, user_message: str, history: List) -> Any:
|
184 |
if not user_message.strip(): return history, *[gr.update()]*4
|
185 |
if not api_key or not state:
|
186 |
-
|
187 |
|
188 |
history.append((user_message, "Thinking... π€")); yield history, *[gr.update(visible=False)]*4
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
|
|
|
|
198 |
"""
|
199 |
try:
|
200 |
-
genai.configure(api_key=api_key)
|
|
|
|
|
|
|
201 |
plan, code, insight = response_json.get("plan"), response_json.get("code"), response_json.get("insight")
|
202 |
stdout, fig, df_result, error = self._safe_exec(code, {'df': state['df'], 'px': px, 'pd': pd})
|
203 |
|
@@ -209,7 +212,7 @@ class DataExplorerApp:
|
|
209 |
yield (history, gr.update(visible=bool(explanation), value=explanation), gr.update(visible=bool(code), value=code),
|
210 |
gr.update(visible=bool(fig), value=fig), gr.update(visible=bool(df_result is not None), value=df_result))
|
211 |
except Exception as e:
|
212 |
-
history[-1] = (user_message, f"I encountered an error processing the AI response. Please rephrase your question
|
213 |
yield history, *[gr.update(visible=False)]*4
|
214 |
|
215 |
def _safe_exec(self, code_string: str, local_vars: Dict) -> Tuple[Any, ...]:
|
@@ -224,8 +227,7 @@ if __name__ == "__main__":
|
|
224 |
try:
|
225 |
from PIL import Image
|
226 |
Image.new('RGB', (1, 1)).save('bot.png')
|
227 |
-
except ImportError:
|
228 |
-
print("Pillow not installed, cannot create dummy bot.png. Please create it manually.")
|
229 |
|
230 |
app = DataExplorerApp()
|
231 |
app.launch()
|
|
|
8 |
import google.generativeai as genai
|
9 |
import os
|
10 |
from typing import List, Dict, Any, Tuple, Optional
|
11 |
+
import re
|
12 |
|
13 |
# --- Configuration & Constants ---
|
14 |
warnings.filterwarnings('ignore')
|
|
|
44 |
status_output = gr.Markdown("Status: Awaiting data...")
|
45 |
api_key_input = gr.Textbox(label="π Gemini API Key", type="password", placeholder="Enter key to enable AI...")
|
46 |
suggestion_btn = gr.Button("Get Smart Suggestions", variant="secondary", interactive=False)
|
47 |
+
rows_stat, cols_stat = gr.Textbox("0", interactive=False, show_label=False), gr.Textbox("0", interactive=False, show_label=False)
|
48 |
+
quality_stat, time_cols_stat = gr.Textbox("0%", interactive=False, show_label=False), gr.Textbox("0", interactive=False, show_label=False)
|
49 |
suggestion_buttons = [gr.Button(visible=False) for _ in range(5)]
|
50 |
plot_type_dd = gr.Dropdown(['histogram', 'bar', 'scatter', 'box'], label="Plot Type", value='histogram')
|
51 |
x_col_dd = gr.Dropdown([], label="X-Axis / Column", interactive=False)
|
|
|
68 |
with cockpit_page:
|
69 |
gr.Markdown("## π Data Cockpit: At-a-Glance Overview")
|
70 |
with gr.Row():
|
71 |
+
for title, stat_comp in [("Rows", rows_stat), ("Columns", cols_stat), ("Data Quality", quality_stat), ("Date/Time Cols", time_cols_stat)]:
|
72 |
+
with gr.Column(elem_classes="stat-card"): gr.Markdown(f"<div class='stat-card-title'>{title}</div>"); stat_comp
|
|
|
|
|
73 |
with gr.Accordion(label="β¨ AI Smart Suggestions", open=True): [btn for btn in suggestion_buttons]
|
74 |
with deep_dive_page:
|
75 |
gr.Markdown("## π Deep Dive: Manual Dashboard Builder"); gr.Markdown("Construct visualizations to investigate specific relationships.")
|
|
|
83 |
|
84 |
# Event Handlers Registration
|
85 |
pages, nav_buttons = [welcome_page, cockpit_page, deep_dive_page, copilot_page], [cockpit_btn, deep_dive_btn, copilot_btn]
|
|
|
86 |
for i, btn in enumerate(nav_buttons):
|
87 |
btn.click(lambda id=btn.elem_id: self._switch_page(id, pages), outputs=pages).then(
|
88 |
lambda i=i: [gr.update(elem_classes="selected" if j==i else "") for j in range(len(nav_buttons))], outputs=nav_buttons)
|
89 |
|
90 |
file_input.upload(self.load_and_process_file, inputs=[file_input], outputs=[
|
91 |
+
state_var, status_output, *pages, rows_stat, cols_stat, quality_stat, time_cols_stat, x_col_dd, y_col_dd, add_plot_btn])
|
|
|
|
|
92 |
|
93 |
api_key_input.change(lambda x: gr.update(interactive=bool(x)), inputs=[api_key_input], outputs=[suggestion_btn])
|
94 |
chat_input.change(lambda x: gr.update(interactive=bool(x.strip())), inputs=[chat_input], outputs=[chat_submit_btn])
|
|
|
99 |
|
100 |
suggestion_btn.click(self.get_ai_suggestions, inputs=[state_var, api_key_input], outputs=suggestion_buttons)
|
101 |
for btn in suggestion_buttons:
|
102 |
+
btn.click(self.handle_suggestion_click, inputs=[btn], outputs=[*pages, chat_input])
|
|
|
|
|
103 |
|
104 |
chat_submit_btn.click(self.respond_to_chat, [state_var, api_key_input, chat_input, chatbot], [chatbot, copilot_explanation, copilot_code, copilot_plot, copilot_table]).then(lambda: "", outputs=[chat_input])
|
105 |
chat_input.submit(self.respond_to_chat, [state_var, api_key_input, chat_input, chatbot], [chatbot, copilot_explanation, copilot_code, copilot_plot, copilot_table]).then(lambda: "", outputs=[chat_input])
|
|
|
107 |
|
108 |
def launch(self): self.demo.launch(debug=True)
|
109 |
|
110 |
+
# --- Backend Logic Methods ---
|
111 |
def _switch_page(self, page_id: str, all_pages: List) -> List[gr.update]:
|
112 |
visibility = {"welcome":0, "cockpit":1, "deep_dive":2, "co-pilot":3}
|
113 |
return [gr.update(visible=i == visibility.get(page_id, 0)) for i in range(len(all_pages))]
|
114 |
|
115 |
+
def _update_plot_controls(self, plot_type: str) -> gr.update: return gr.update(visible=plot_type in ['scatter', 'box'])
|
|
|
116 |
|
117 |
def load_and_process_file(self, file_obj: Any) -> Tuple[Any, ...]:
|
118 |
try:
|
|
|
121 |
state = {'df': df, 'metadata': metadata, 'dashboard_plots': []}
|
122 |
rows, cols, quality = metadata['shape'][0], metadata['shape'][1], metadata['data_quality']
|
123 |
page_updates = self._switch_page("cockpit", [0,1,2,3])
|
124 |
+
return (state, f"β
**{os.path.basename(file_obj.name)}** loaded.", *page_updates, f"{rows:,}", f"{cols}", f"{quality}%", f"{len(metadata['datetime_cols'])}",
|
|
|
125 |
gr.update(choices=metadata['columns'], interactive=True), gr.update(choices=metadata['columns'], interactive=True), gr.update(interactive=True))
|
126 |
except Exception as e:
|
127 |
gr.Error(f"File Load Error: {e}"); page_updates = self._switch_page("welcome", [0,1,2,3]);
|
128 |
return {}, f"β Error: {e}", *page_updates, "0", "0", "0%", "0", gr.update(choices=[], interactive=False), gr.update(choices=[], interactive=False), gr.update(interactive=False)
|
129 |
|
130 |
def _extract_dataset_metadata(self, df: pd.DataFrame) -> Dict[str, Any]:
|
131 |
+
rows, cols, quality = df.shape[0], df.shape[1], round((df.notna().sum().sum() / (df.size)) * 100, 1) if df.size > 0 else 0
|
|
|
132 |
return {'shape': (rows, cols), 'columns': df.columns.tolist(), 'numeric_cols': df.select_dtypes(include=np.number).columns.tolist(),
|
133 |
'categorical_cols': df.select_dtypes(include=['object', 'category']).columns.tolist(), 'datetime_cols': df.select_dtypes(include=['datetime64', 'datetime64[ns]']).columns.tolist(),
|
134 |
'dtypes_head': df.head(3).to_string(), 'data_quality': quality}
|
|
|
160 |
def get_ai_suggestions(self, state: Dict, api_key: str) -> List[gr.update]:
|
161 |
if not api_key: gr.Warning("API Key is required."); return [gr.update(visible=False)]*5
|
162 |
if not state: gr.Warning("Please load data first."); return [gr.update(visible=False)]*5
|
163 |
+
metadata, columns = state.get('metadata', {}), state.get('metadata', {}).get('columns', [])
|
|
|
|
|
164 |
prompt = f"From columns {columns}, generate 4 impactful analytical questions. Return ONLY a JSON list of strings."
|
165 |
try:
|
166 |
genai.configure(api_key=api_key); suggestions = json.loads(genai.GenerativeModel('gemini-1.5-flash').generate_content(prompt).text)
|
|
|
170 |
def handle_suggestion_click(self, question: str) -> Tuple[gr.update, ...]:
|
171 |
return *self._switch_page("co-pilot", [0,1,2,3]), question
|
172 |
|
173 |
+
def _sanitize_and_parse_json(self, raw_text: str) -> Dict:
|
174 |
+
"""Cleans and parses a JSON string from an LLM response."""
|
175 |
+
# Remove markdown code blocks
|
176 |
+
clean_text = re.sub(r'```json\n?|```', '', raw_text).strip()
|
177 |
+
# Escape single backslashes that are not already escaped
|
178 |
+
clean_text = re.sub(r'(?<!\\)\\(?!["\\/bfnrtu])', r'\\\\', clean_text)
|
179 |
+
return json.loads(clean_text)
|
180 |
+
|
181 |
def respond_to_chat(self, state: Dict, api_key: str, user_message: str, history: List) -> Any:
|
182 |
if not user_message.strip(): return history, *[gr.update()]*4
|
183 |
if not api_key or not state:
|
184 |
+
history.append((user_message, "I need a Gemini API key and a dataset to work.")); return history, *[gr.update(visible=False)]*4
|
185 |
|
186 |
history.append((user_message, "Thinking... π€")); yield history, *[gr.update(visible=False)]*4
|
187 |
|
188 |
+
metadata, dtypes_head = state.get('metadata', {}), state.get('metadata', {}).get('dtypes_head', 'No metadata available.')
|
189 |
+
prompt = f"""You are 'Chief Data Scientist', an expert AI analyst. Your goal is to answer a user's question about a pandas DataFrame (`df`) by writing and executing Python code.
|
190 |
+
**Instructions:**
|
191 |
+
1. **Analyze:** Understand the user's intent. Infer the best plot type.
|
192 |
+
2. **Plan:** Briefly explain your plan.
|
193 |
+
3. **Code:** Write Python code. Use `fig` for plots (`template='plotly_dark'`) and `result_df` for tables.
|
194 |
+
4. **Insight:** Provide a one-sentence business insight.
|
195 |
+
5. **Respond ONLY with a single JSON object with keys: "plan", "code", "insight".**
|
196 |
+
**Metadata:** {dtypes_head}
|
197 |
+
**User Question:** "{user_message}"
|
198 |
"""
|
199 |
try:
|
200 |
+
genai.configure(api_key=api_key)
|
201 |
+
# CRITICAL FIX: Use the new sanitizer function
|
202 |
+
response_json = self._sanitize_and_parse_json(genai.GenerativeModel('gemini-1.5-flash').generate_content(prompt).text)
|
203 |
+
|
204 |
plan, code, insight = response_json.get("plan"), response_json.get("code"), response_json.get("insight")
|
205 |
stdout, fig, df_result, error = self._safe_exec(code, {'df': state['df'], 'px': px, 'pd': pd})
|
206 |
|
|
|
212 |
yield (history, gr.update(visible=bool(explanation), value=explanation), gr.update(visible=bool(code), value=code),
|
213 |
gr.update(visible=bool(fig), value=fig), gr.update(visible=bool(df_result is not None), value=df_result))
|
214 |
except Exception as e:
|
215 |
+
history[-1] = (user_message, f"I encountered an error processing the AI response. Please rephrase your question.\n\n**Details:** `{str(e)}`")
|
216 |
yield history, *[gr.update(visible=False)]*4
|
217 |
|
218 |
def _safe_exec(self, code_string: str, local_vars: Dict) -> Tuple[Any, ...]:
|
|
|
227 |
try:
|
228 |
from PIL import Image
|
229 |
Image.new('RGB', (1, 1)).save('bot.png')
|
230 |
+
except ImportError: print("Pillow not installed, cannot create dummy bot.png.")
|
|
|
231 |
|
232 |
app = DataExplorerApp()
|
233 |
app.launch()
|