Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,486 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import plotly.express as px
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
from plotly.subplots import make_subplots
|
7 |
+
import io
|
8 |
+
from scipy import stats
|
9 |
+
import warnings
|
10 |
+
import google.generativeai as genai
|
11 |
+
import os
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
import logging
|
14 |
+
import json
|
15 |
+
from contextlib import redirect_stdout
|
16 |
+
|
17 |
+
# --- Configuration ---
|
18 |
+
warnings.filterwarnings('ignore')
|
19 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
20 |
+
|
21 |
+
# --- Helper Functions ---
|
22 |
+
|
23 |
+
def safe_exec(code_string: str, local_vars: dict) -> tuple:
|
24 |
+
"""Safely execute a string of Python code and capture its output."""
|
25 |
+
output_buffer = io.StringIO()
|
26 |
+
try:
|
27 |
+
with redirect_stdout(output_buffer):
|
28 |
+
exec(code_string, globals(), local_vars)
|
29 |
+
|
30 |
+
stdout_output = output_buffer.getvalue()
|
31 |
+
fig = local_vars.get('fig', None)
|
32 |
+
return stdout_output, fig, None
|
33 |
+
except Exception as e:
|
34 |
+
error_message = f"Execution Error: {str(e)}"
|
35 |
+
logging.error(f"Error executing AI-generated code: {error_message}")
|
36 |
+
return None, None, error_message
|
37 |
+
|
38 |
+
# --- Core Data Processing ---
|
39 |
+
|
40 |
+
def load_and_process_file(file_obj, state_dict):
|
41 |
+
"""Loads a CSV file and performs initial processing, updating the global state."""
|
42 |
+
if file_obj is None:
|
43 |
+
return None, "Please upload a file.", gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
|
44 |
+
|
45 |
+
try:
|
46 |
+
df = pd.read_csv(file_obj.name)
|
47 |
+
|
48 |
+
# Attempt to convert object columns to datetime
|
49 |
+
for col in df.select_dtypes(include=['object']).columns:
|
50 |
+
try:
|
51 |
+
df[col] = pd.to_datetime(df[col], errors='raise')
|
52 |
+
logging.info(f"Successfully converted column '{col}' to datetime.")
|
53 |
+
except (ValueError, TypeError):
|
54 |
+
continue
|
55 |
+
|
56 |
+
metadata = extract_dataset_metadata(df)
|
57 |
+
|
58 |
+
state_dict = {
|
59 |
+
'df': df,
|
60 |
+
'metadata': metadata,
|
61 |
+
'filename': os.path.basename(file_obj.name)
|
62 |
+
}
|
63 |
+
|
64 |
+
# Update UI elements dynamically
|
65 |
+
update_args = {
|
66 |
+
'choices': metadata['columns'],
|
67 |
+
'value': None,
|
68 |
+
'interactive': True
|
69 |
+
}
|
70 |
+
|
71 |
+
# Check for time series tab visibility
|
72 |
+
time_series_visible = len(metadata['datetime_cols']) > 0
|
73 |
+
|
74 |
+
return (
|
75 |
+
state_dict,
|
76 |
+
f"β
Loaded `{state_dict['filename']}` ({metadata['shape'][0]} rows, {metadata['shape'][1]} cols)",
|
77 |
+
gr.update(**update_args), gr.update(**update_args), gr.update(**update_args),
|
78 |
+
gr.update(choices=metadata['numeric_cols'], value=None, interactive=True),
|
79 |
+
gr.update(choices=metadata['datetime_cols'], value=None, interactive=True),
|
80 |
+
gr.update(visible=time_series_visible), # Show/hide Time Series tab
|
81 |
+
gr.update(visible=True) # Show Chatbot tab
|
82 |
+
)
|
83 |
+
except Exception as e:
|
84 |
+
logging.error(f"Error loading file: {e}")
|
85 |
+
return state_dict, f"β Error: {e}", gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(visible=False), gr.update(visible=False)
|
86 |
+
|
87 |
+
def extract_dataset_metadata(df: pd.DataFrame) -> dict:
|
88 |
+
"""Extracts comprehensive metadata from a DataFrame."""
|
89 |
+
rows, cols = df.shape
|
90 |
+
columns = df.columns.tolist()
|
91 |
+
|
92 |
+
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
|
93 |
+
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
|
94 |
+
datetime_cols = df.select_dtypes(include=['datetime64', 'datetime64[ns]']).columns.tolist()
|
95 |
+
|
96 |
+
missing_data = df.isnull().sum()
|
97 |
+
data_quality = round((df.notna().sum().sum() / (rows * cols)) * 100, 1) if rows * cols > 0 else 0
|
98 |
+
|
99 |
+
return {
|
100 |
+
'shape': (rows, cols),
|
101 |
+
'columns': columns,
|
102 |
+
'numeric_cols': numeric_cols,
|
103 |
+
'categorical_cols': categorical_cols,
|
104 |
+
'datetime_cols': datetime_cols,
|
105 |
+
'dtypes': df.dtypes.to_string(),
|
106 |
+
'missing_data': missing_data.to_dict(),
|
107 |
+
'data_quality': data_quality,
|
108 |
+
'head': df.head().to_string()
|
109 |
+
}
|
110 |
+
|
111 |
+
# --- Tab 1: AI Overview ---
|
112 |
+
|
113 |
+
def analyze_dataset_overview(state_dict, api_key: str):
|
114 |
+
"""Generates an AI-powered narrative overview of the dataset."""
|
115 |
+
if not state_dict:
|
116 |
+
return "β Please upload a dataset first.", "", 0
|
117 |
+
if not api_key:
|
118 |
+
return "β Please enter your Gemini API key.", "", 0
|
119 |
+
|
120 |
+
metadata = state_dict['metadata']
|
121 |
+
|
122 |
+
# Create prompt for Gemini
|
123 |
+
prompt = f"""
|
124 |
+
You are an expert data analyst and storyteller. Your task is to provide a high-level, engaging overview of a dataset based on its metadata.
|
125 |
+
|
126 |
+
**Dataset Metadata:**
|
127 |
+
- **Shape:** {metadata['shape'][0]} rows, {metadata['shape'][1]} columns
|
128 |
+
- **Column Names:** {', '.join(metadata['columns'])}
|
129 |
+
- **Numeric Columns:** {', '.join(metadata['numeric_cols'])}
|
130 |
+
- **Categorical Columns:** {', '.join(metadata['categorical_cols'])}
|
131 |
+
- **Datetime Columns:** {', '.join(metadata['datetime_cols'])}
|
132 |
+
- **Data Quality (Non-missing values):** {metadata['data_quality']}%
|
133 |
+
- **First 5 rows:**
|
134 |
+
{metadata['head']}
|
135 |
+
|
136 |
+
**Your Task:**
|
137 |
+
Based on the metadata, generate a report in Markdown format. Use emojis to make it visually appealing. The report should have the following sections:
|
138 |
+
|
139 |
+
# π AI-Powered Dataset Overview
|
140 |
+
|
141 |
+
## π€ What is this dataset likely about?
|
142 |
+
(Predict the domain and purpose of the dataset, e.g., "This appears to be customer transaction data for an e-commerce platform.")
|
143 |
+
|
144 |
+
## π‘ Potential Key Questions to Explore
|
145 |
+
- (Suggest 3-4 interesting business or research questions the data could answer.)
|
146 |
+
- (Example: "Which products are most frequently purchased together?")
|
147 |
+
|
148 |
+
## π Potential Analyses & Visualizations
|
149 |
+
- (List 3-4 types of analyses that would be valuable.)
|
150 |
+
- (Example: "Time series analysis of sales to identify seasonality.")
|
151 |
+
|
152 |
+
## β οΈ Data Quality & Potential Issues
|
153 |
+
- (Briefly comment on the data quality score and mention if the presence of datetime columns is a good sign for certain analyses.)
|
154 |
+
"""
|
155 |
+
|
156 |
+
try:
|
157 |
+
genai.configure(api_key=api_key)
|
158 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
159 |
+
response = model.generate_content(prompt)
|
160 |
+
story = response.text
|
161 |
+
except Exception as e:
|
162 |
+
story = f"## β οΈ AI Generation Failed\n**Error:** {str(e)}\n\nPlease check your API key and network connection. A fallback analysis is provided below.\n\n" \
|
163 |
+
f"### Fallback Analysis\nThis dataset contains **{metadata['shape'][0]}** records and **{metadata['shape'][1]}** features. " \
|
164 |
+
f"It includes **{len(metadata['numeric_cols'])}** numeric, **{len(metadata['categorical_cols'])}** categorical, " \
|
165 |
+
f"and **{len(metadata['datetime_cols'])}** time-based columns. The overall data quality is **{metadata['data_quality']}%**, " \
|
166 |
+
f"which is a good starting point for analysis."
|
167 |
+
|
168 |
+
# Basic Info Summary
|
169 |
+
basic_info = f"""
|
170 |
+
π **File:** `{state_dict.get('filename', 'N/A')}`
|
171 |
+
π **Size:** {metadata['shape'][0]:,} rows Γ {metadata['shape'][1]} columns
|
172 |
+
π’ **Features:**
|
173 |
+
β’ **Numeric:** {len(metadata['numeric_cols'])}
|
174 |
+
β’ **Categorical:** {len(metadata['categorical_cols'])}
|
175 |
+
β’ **DateTime:** {len(metadata['datetime_cols'])}
|
176 |
+
π― **Data Quality:** {metadata['data_quality']}%
|
177 |
+
"""
|
178 |
+
|
179 |
+
return story, basic_info, metadata['data_quality']
|
180 |
+
|
181 |
+
# --- Tab 2: Univariate Analysis ---
|
182 |
+
|
183 |
+
def generate_univariate_plot(column_name, state_dict):
|
184 |
+
"""Generates plots for a single selected variable."""
|
185 |
+
if not column_name or not state_dict:
|
186 |
+
return None, "Select a column to analyze."
|
187 |
+
|
188 |
+
df = state_dict['df']
|
189 |
+
metadata = state_dict['metadata']
|
190 |
+
|
191 |
+
fig = None
|
192 |
+
summary = ""
|
193 |
+
|
194 |
+
if column_name in metadata['numeric_cols']:
|
195 |
+
fig = make_subplots(rows=1, cols=2, subplot_titles=("Histogram", "Box Plot"))
|
196 |
+
fig.add_trace(go.Histogram(x=df[column_name], name="Histogram"), row=1, col=1)
|
197 |
+
fig.add_trace(go.Box(y=df[column_name], name="Box Plot"), row=1, col=2)
|
198 |
+
fig.update_layout(title_text=f"Distribution of '{column_name}'", showlegend=False)
|
199 |
+
summary = df[column_name].describe().to_frame().to_markdown()
|
200 |
+
|
201 |
+
elif column_name in metadata['categorical_cols']:
|
202 |
+
top_n = 20
|
203 |
+
counts = df[column_name].value_counts()
|
204 |
+
title = f"Top {min(top_n, len(counts))} Categories for '{column_name}'"
|
205 |
+
fig = px.bar(counts.nlargest(top_n), title=title, labels={'index': column_name, 'value': 'Count'})
|
206 |
+
fig.update_layout(showlegend=False)
|
207 |
+
summary = counts.to_frame().to_markdown()
|
208 |
+
|
209 |
+
elif column_name in metadata['datetime_cols']:
|
210 |
+
counts = df[column_name].dt.to_period("M").value_counts().sort_index()
|
211 |
+
fig = px.line(x=counts.index.to_timestamp(), y=counts.values, title=f"Records over Time for '{column_name}'")
|
212 |
+
fig.update_layout(xaxis_title="Time", yaxis_title="Record Count")
|
213 |
+
summary = df[column_name].describe(datetime_is_numeric=True).to_frame().to_markdown()
|
214 |
+
|
215 |
+
return fig, summary
|
216 |
+
|
217 |
+
# --- Tab 3: Bivariate Analysis ---
|
218 |
+
|
219 |
+
def generate_bivariate_plot(x_col, y_col, state_dict):
|
220 |
+
"""Generates plots to explore the relationship between two variables."""
|
221 |
+
if not x_col or not y_col or not state_dict:
|
222 |
+
return None, "Select two columns to analyze."
|
223 |
+
if x_col == y_col:
|
224 |
+
return None, "Please select two different columns."
|
225 |
+
|
226 |
+
df = state_dict['df']
|
227 |
+
metadata = state_dict['metadata']
|
228 |
+
|
229 |
+
x_type = 'numeric' if x_col in metadata['numeric_cols'] else 'categorical'
|
230 |
+
y_type = 'numeric' if y_col in metadata['numeric_cols'] else 'categorical'
|
231 |
+
|
232 |
+
fig = None
|
233 |
+
title = f"{x_col} vs. {y_col}"
|
234 |
+
|
235 |
+
if x_type == 'numeric' and y_type == 'numeric':
|
236 |
+
fig = px.scatter(df, x=x_col, y=y_col, title=f"Scatter Plot: {title}", trendline="ols", trendline_color_override="red")
|
237 |
+
summary = df[[x_col, y_col]].corr().to_markdown()
|
238 |
+
elif x_type == 'numeric' and y_type == 'categorical':
|
239 |
+
fig = px.box(df, x=x_col, y=y_col, title=f"Box Plot: {title}")
|
240 |
+
summary = df.groupby(y_col)[x_col].describe().to_markdown()
|
241 |
+
elif x_type == 'categorical' and y_type == 'numeric':
|
242 |
+
fig = px.box(df, x=y_col, y=x_col, title=f"Box Plot: {title}")
|
243 |
+
summary = df.groupby(x_col)[y_col].describe().to_markdown()
|
244 |
+
else: # Both categorical
|
245 |
+
crosstab = pd.crosstab(df[x_col], df[y_col])
|
246 |
+
fig = px.imshow(crosstab, title=f"Heatmap of Counts: {title}", text_auto=True)
|
247 |
+
summary = crosstab.to_markdown()
|
248 |
+
|
249 |
+
return fig, f"### Analysis Summary\n{summary}"
|
250 |
+
|
251 |
+
# --- Tab 4: Time Series Analysis ---
|
252 |
+
|
253 |
+
def generate_time_series_plot(time_col, value_col, resample_freq, state_dict):
|
254 |
+
"""Generates a time series plot with resampling."""
|
255 |
+
if not time_col or not value_col or not state_dict:
|
256 |
+
return None, "Select Time and Value columns."
|
257 |
+
|
258 |
+
df = state_dict['df'].copy()
|
259 |
+
|
260 |
+
try:
|
261 |
+
df[time_col] = pd.to_datetime(df[time_col])
|
262 |
+
df_resampled = df.set_index(time_col)[value_col].resample(resample_freq).mean().reset_index()
|
263 |
+
|
264 |
+
fig = px.line(df_resampled, x=time_col, y=value_col,
|
265 |
+
title=f"Time Series of {value_col} (Resampled to '{resample_freq}')")
|
266 |
+
fig.update_layout(xaxis_title="Date", yaxis_title=f"Mean of {value_col}")
|
267 |
+
return fig, f"Showing mean of '{value_col}' aggregated by '{resample_freq}'."
|
268 |
+
except Exception as e:
|
269 |
+
return None, f"Error: {e}"
|
270 |
+
|
271 |
+
# --- Tab 5: AI Analyst Chat ---
|
272 |
+
|
273 |
+
def respond_to_chat(user_message, history, state_dict, api_key):
|
274 |
+
"""Handles the chat interaction with the AI Analyst."""
|
275 |
+
if not api_key:
|
276 |
+
history.append((user_message, "I can't answer without a Gemini API key. Please enter it in the 'AI Overview' tab."))
|
277 |
+
return history, None, ""
|
278 |
+
|
279 |
+
if not state_dict:
|
280 |
+
history.append((user_message, "Please upload a dataset before asking questions."))
|
281 |
+
return history, None, ""
|
282 |
+
|
283 |
+
history.append((user_message, None))
|
284 |
+
|
285 |
+
df_metadata = state_dict['metadata']
|
286 |
+
|
287 |
+
# Construct a robust prompt for the AI
|
288 |
+
prompt = f"""
|
289 |
+
You are an AI Data Analyst assistant. Your name is 'Gemini Analyst'.
|
290 |
+
You are given a pandas DataFrame named `df`.
|
291 |
+
Your goal is to answer the user's question about this DataFrame by writing and executing Python code.
|
292 |
+
|
293 |
+
**Instructions:**
|
294 |
+
1. Analyze the user's question.
|
295 |
+
2. Write Python code to answer it.
|
296 |
+
3. You can use pandas, numpy, and plotly.express.
|
297 |
+
4. If you create a plot, you **MUST** assign it to a variable named `fig`. The plot will be displayed to the user.
|
298 |
+
5. If you are just calculating something or printing text, the `print()` output will be shown.
|
299 |
+
6. **DO NOT** write any code that modifies the DataFrame (e.g., `df.dropna(inplace=True)`). Use `df.copy()` if you need to modify data.
|
300 |
+
7. Respond **ONLY** with a JSON object containing two keys: "thought" and "code".
|
301 |
+
- "thought": A short, one-sentence explanation of your plan.
|
302 |
+
- "code": A string containing the Python code to execute.
|
303 |
+
|
304 |
+
**DataFrame Metadata:**
|
305 |
+
- **Filename:** {state_dict['filename']}
|
306 |
+
- **Shape:** {df_metadata['shape'][0]} rows, {df_metadata['shape'][1]} columns
|
307 |
+
- **Columns and Data Types:**
|
308 |
+
{df_metadata['dtypes']}
|
309 |
+
|
310 |
+
---
|
311 |
+
**User Question:** "{user_message}"
|
312 |
+
---
|
313 |
+
|
314 |
+
**Your JSON Response:**
|
315 |
+
"""
|
316 |
+
|
317 |
+
try:
|
318 |
+
genai.configure(api_key=api_key)
|
319 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
320 |
+
response = model.generate_content(prompt)
|
321 |
+
|
322 |
+
# Clean and parse the JSON response
|
323 |
+
response_text = response.text.strip().replace("```json", "").replace("```", "")
|
324 |
+
response_json = json.loads(response_text)
|
325 |
+
|
326 |
+
thought = response_json.get("thought", "Thinking...")
|
327 |
+
code_to_run = response_json.get("code", "")
|
328 |
+
|
329 |
+
bot_message = f"π§ **Thought:** {thought}\n\n"
|
330 |
+
|
331 |
+
# Execute the code
|
332 |
+
local_vars = {'df': state_dict['df'], 'px': px, 'pd': pd, 'np': np}
|
333 |
+
stdout, fig_result, error = safe_exec(code_to_run, local_vars)
|
334 |
+
|
335 |
+
if error:
|
336 |
+
bot_message += f"π₯ **Error:**\n```\n{error}\n```"
|
337 |
+
history[-1] = (user_message, bot_message)
|
338 |
+
return history, None, ""
|
339 |
+
|
340 |
+
if stdout:
|
341 |
+
bot_message += f"π **Output:**\n```\n{stdout}\n```"
|
342 |
+
|
343 |
+
if not fig_result and not stdout:
|
344 |
+
bot_message += "β
Code executed successfully, but it produced no visible output."
|
345 |
+
|
346 |
+
history[-1] = (user_message, bot_message)
|
347 |
+
|
348 |
+
return history, fig_result, ""
|
349 |
+
|
350 |
+
except Exception as e:
|
351 |
+
error_msg = f"An unexpected error occurred: {e}. The AI might have returned an invalid response. Please try rephrasing your question."
|
352 |
+
logging.error(f"Chatbot error: {error_msg}")
|
353 |
+
history[-1] = (user_message, error_msg)
|
354 |
+
return history, None, ""
|
355 |
+
|
356 |
+
# --- Gradio Interface ---
|
357 |
+
|
358 |
+
def create_gradio_interface():
|
359 |
+
"""Builds and returns the full Gradio application interface."""
|
360 |
+
with gr.Blocks(title="π AI Data Explorer", theme=gr.themes.Soft()) as demo:
|
361 |
+
# Global state to hold data
|
362 |
+
global_state = gr.State({})
|
363 |
+
|
364 |
+
# Header
|
365 |
+
gr.Markdown("# π AI Data Explorer: Your Advanced Analytic Tool")
|
366 |
+
gr.Markdown("Upload a CSV, then explore your data with interactive tabs and a powerful AI Analyst.")
|
367 |
+
|
368 |
+
# --- Top Row: File Upload and API Key ---
|
369 |
+
with gr.Row():
|
370 |
+
with gr.Column(scale=2):
|
371 |
+
file_input = gr.File(label="π Upload CSV File", file_types=[".csv"])
|
372 |
+
status_output = gr.Markdown("Status: Waiting for file...")
|
373 |
+
with gr.Column(scale=1):
|
374 |
+
api_key_input = gr.Textbox(
|
375 |
+
label="π Gemini API Key",
|
376 |
+
placeholder="Enter your key here...",
|
377 |
+
type="password",
|
378 |
+
info="Get your free key from Google AI Studio"
|
379 |
+
)
|
380 |
+
|
381 |
+
# --- Main Tabs ---
|
382 |
+
with gr.Tabs() as tabs:
|
383 |
+
# Tab 1: AI Overview
|
384 |
+
with gr.Tab("π€ AI Overview", id=0):
|
385 |
+
overview_btn = gr.Button("π§ Generate AI Overview", variant="primary")
|
386 |
+
with gr.Row():
|
387 |
+
story_output = gr.Markdown(label="π AI-Generated Story")
|
388 |
+
with gr.Column():
|
389 |
+
basic_info_output = gr.Markdown(label="π Basic Information")
|
390 |
+
quality_score = gr.Number(label="π― Data Quality Score (%)", interactive=False)
|
391 |
+
|
392 |
+
# Tab 2: Univariate Analysis
|
393 |
+
with gr.Tab("π Univariate Analysis", id=1):
|
394 |
+
uni_col_select = gr.Dropdown(label="Select a Column to Analyze", interactive=False)
|
395 |
+
with gr.Row():
|
396 |
+
uni_plot_output = gr.Plot(label="Distribution Plot")
|
397 |
+
uni_summary_output = gr.Markdown(label="Summary Statistics")
|
398 |
+
|
399 |
+
# Tab 3: Bivariate Analysis
|
400 |
+
with gr.Tab("π Bivariate Analysis", id=2):
|
401 |
+
with gr.Row():
|
402 |
+
bi_x_select = gr.Dropdown(label="Select X-Axis Column", interactive=False)
|
403 |
+
bi_y_select = gr.Dropdown(label="Select Y-Axis Column", interactive=False)
|
404 |
+
bi_btn = gr.Button("π¨ Generate Bivariate Plot", variant="secondary")
|
405 |
+
with gr.Row():
|
406 |
+
bi_plot_output = gr.Plot(label="Relationship Plot")
|
407 |
+
bi_summary_output = gr.Markdown(label="Analysis Summary")
|
408 |
+
|
409 |
+
# Tab 4: Time Series (conditionally visible)
|
410 |
+
with gr.Tab("β³ Time Series Analysis", id=3, visible=False) as ts_tab:
|
411 |
+
with gr.Row():
|
412 |
+
ts_time_col = gr.Dropdown(label="Select Time Column", interactive=False)
|
413 |
+
ts_value_col = gr.Dropdown(label="Select Value Column", interactive=False)
|
414 |
+
ts_resample = gr.Radio(['D', 'W', 'M', 'Q', 'Y'], label="Resample Frequency", value='M')
|
415 |
+
ts_btn = gr.Button("π Plot Time Series", variant="secondary")
|
416 |
+
ts_plot_output = gr.Plot(label="Time Series Plot")
|
417 |
+
ts_status_output = gr.Markdown()
|
418 |
+
|
419 |
+
# Tab 5: AI Analyst Chat (conditionally visible)
|
420 |
+
with gr.Tab("π¬ AI Analyst Chat", id=4, visible=False) as chat_tab:
|
421 |
+
chatbot = gr.Chatbot(label="Chat with Gemini Analyst", height=500)
|
422 |
+
chat_plot_output = gr.Plot(label="AI Generated Plot")
|
423 |
+
with gr.Row():
|
424 |
+
chat_input = gr.Textbox(label="Your Question", placeholder="e.g., 'Show me the distribution of age'", scale=4)
|
425 |
+
chat_submit_btn = gr.Button("Submit", variant="primary", scale=1)
|
426 |
+
chat_clear_btn = gr.Button("Clear Chat")
|
427 |
+
|
428 |
+
# --- Event Handlers ---
|
429 |
+
|
430 |
+
# File upload triggers data loading and UI updates
|
431 |
+
file_input.upload(
|
432 |
+
fn=load_and_process_file,
|
433 |
+
inputs=[file_input, global_state],
|
434 |
+
outputs=[global_state, status_output, uni_col_select, bi_x_select, bi_y_select, ts_value_col, ts_time_col, ts_tab, chat_tab]
|
435 |
+
)
|
436 |
+
|
437 |
+
# Tab 1: Overview
|
438 |
+
overview_btn.click(
|
439 |
+
fn=analyze_dataset_overview,
|
440 |
+
inputs=[global_state, api_key_input],
|
441 |
+
outputs=[story_output, basic_info_output, quality_score]
|
442 |
+
)
|
443 |
+
|
444 |
+
# Tab 2: Univariate
|
445 |
+
uni_col_select.change(
|
446 |
+
fn=generate_univariate_plot,
|
447 |
+
inputs=[uni_col_select, global_state],
|
448 |
+
outputs=[uni_plot_output, uni_summary_output]
|
449 |
+
)
|
450 |
+
|
451 |
+
# Tab 3: Bivariate
|
452 |
+
bi_btn.click(
|
453 |
+
fn=generate_bivariate_plot,
|
454 |
+
inputs=[bi_x_select, bi_y_select, global_state],
|
455 |
+
outputs=[bi_plot_output, bi_summary_output]
|
456 |
+
)
|
457 |
+
|
458 |
+
# Tab 4: Time Series
|
459 |
+
ts_btn.click(
|
460 |
+
fn=generate_time_series_plot,
|
461 |
+
inputs=[ts_time_col, ts_value_col, ts_resample, global_state],
|
462 |
+
outputs=[ts_plot_output, ts_status_output]
|
463 |
+
)
|
464 |
+
|
465 |
+
# Tab 5: AI Chat
|
466 |
+
chat_submit_btn.click(
|
467 |
+
fn=respond_to_chat,
|
468 |
+
inputs=[chat_input, chatbot, global_state, api_key_input],
|
469 |
+
outputs=[chatbot, chat_plot_output, chat_input]
|
470 |
+
)
|
471 |
+
chat_input.submit(
|
472 |
+
fn=respond_to_chat,
|
473 |
+
inputs=[chat_input, chatbot, global_state, api_key_input],
|
474 |
+
outputs=[chatbot, chat_plot_output, chat_input]
|
475 |
+
)
|
476 |
+
chat_clear_btn.click(lambda: ([], None, ""), None, [chatbot, chat_plot_output, chat_input])
|
477 |
+
|
478 |
+
|
479 |
+
return demo
|
480 |
+
|
481 |
+
# --- Main Application Execution ---
|
482 |
+
if __name__ == "__main__":
|
483 |
+
# For local development, you might use load_dotenv()
|
484 |
+
# load_dotenv()
|
485 |
+
app = create_gradio_interface()
|
486 |
+
app.launch(debug=True)
|