Spaces:
Sleeping
Sleeping
"""app.py — BizIntel AI Ultra (Gemini‑only, v3) | |
A production‑grade BI copilot with: | |
• CSV / Excel / Parquet and live SQL ingestion | |
• Memory‑safe chunk loading (≥2 GB) & dtype auto‑fix | |
• Instant schema audit + Gemini‑generated insights | |
• Drill‑down EDA (histogram, violin, scatter‑matrix, heat‑map) | |
• Auto‑detected datetime + user‑tunable ARIMA forecasting | |
• One‑click strategy brief (Markdown) | |
""" | |
from __future__ import annotations | |
import os, io, tempfile | |
from pathlib import Path | |
from typing import List | |
import pandas as pd | |
import streamlit as st | |
import plotly.express as px | |
from statsmodels.tsa.arima.model import ARIMA | |
from sqlalchemy import create_engine | |
import google.generativeai as genai | |
# ─────────────────── 0 · CONFIG & SECRETS ──────────────────── | |
API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY") | |
if not API_KEY: | |
st.error("❌ `GEMINI_APIKEY` missing — add it in *Settings → Secrets* or env vars.") | |
st.stop() | |
genai.configure(api_key=API_KEY) | |
GEM_MODEL = "gemini-1.5-pro-latest" | |
TMP = Path(tempfile.gettempdir()) | |
st.set_page_config("BizIntel AI Ultra", "📊", "wide", initial_sidebar_state="expanded") | |
# ─────────────────── 1 · UTILITY HELPERS ───────────────────── | |
def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame: | |
suf = Path(buf.name).suffix.lower() | |
if suf in {".xls", ".xlsx"}: # Excel | |
return pd.read_excel(buf, engine="openpyxl") | |
if suf == ".parquet": | |
return pd.read_parquet(buf) | |
return pd.read_csv(buf, nrows=5_000_000 if sample else None) | |
def sql_tables(uri: str) -> List[str]: | |
return create_engine(uri).table_names() | |
def read_table(uri: str, tbl: str) -> pd.DataFrame: | |
return pd.read_sql_table(tbl, create_engine(uri)) | |
def ask_gemini(prompt: str) -> str: | |
return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip() | |
# ─────────────────── 2 · DATA INGESTION ────────────────────── | |
st.title("📊 BizIntel AI Ultra — Gemini 1.5 Pro BI Copilot") | |
mode = st.sidebar.radio("Source", ["File", "SQL"], horizontal=True) | |
DF: pd.DataFrame = pd.DataFrame() | |
if mode == "File": | |
upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv","xls","xlsx","parquet"], help="≤2 GB") | |
sample = st.sidebar.checkbox("Load sample only (≤ 5 M rows)") | |
if upl: | |
DF = read_file(upl, sample) | |
else: | |
uri = st.sidebar.text_input("SQLAlchemy URI") | |
if uri: | |
tbl = st.sidebar.selectbox("Table", sql_tables(uri)) | |
if tbl: | |
DF = read_table(uri, tbl) | |
if DF.empty: | |
st.info("⬅️ Load data to start.") | |
st.stop() | |
st.success("✅ Data loaded") | |
st.dataframe(DF.head(), use_container_width=True) | |
# ─────────────────── 3 · QUICK STATS + GEMINI INSIGHT ──────── | |
rows, cols = DF.shape | |
miss = DF.isna().sum().sum() / (rows*cols) * 100 | |
c1,c2,c3 = st.columns(3) | |
c1.metric("Rows", f"{rows:,}") | |
c2.metric("Columns", cols) | |
c3.metric("Missing %", f"{miss:.1f}") | |
st.subheader("🧠 Gemini Insights") | |
with st.spinner("Gemini analysing…"): | |
summary = DF.describe(include="all", datetime_is_numeric=True).round(2).to_json() | |
st.markdown(ask_gemini( | |
"You are a senior BI analyst. Give 5 concise insights and 3 action items for the dataset: " + summary | |
)) | |
# ─────────────────── 4 · TIME‑SERIES SELECTION ─────────────── | |
# attempt datetime coercion | |
for c in DF.columns: | |
if not pd.api.types.is_datetime64_any_dtype(DF[c]): | |
try: | |
DF[c] = pd.to_datetime(DF[c]) | |
except: # noqa: E722 | |
pass | |
DATE_COL = st.selectbox("Date column", [c for c in DF.columns if pd.api.types.is_datetime64_any_dtype(DF[c])]) | |
METRIC_COL = st.selectbox("Numeric metric", [c for c in DF.select_dtypes("number").columns]) | |
ts = ( | |
DF[[DATE_COL, METRIC_COL]].dropna() | |
.groupby(DATE_COL)[METRIC_COL].mean().sort_index() | |
) | |
fig_ts = px.line(ts, title=f"{METRIC_COL} Trend", labels={"index":"Date", METRIC_COL:METRIC_COL}) | |
st.plotly_chart(fig_ts, use_container_width=True) | |
# ─────────────────── 5 · FORECASTING ───────────────────────── | |
st.subheader("🔮 Forecast") | |
steps = st.slider("Horizon", 3, 365, 90) | |
p = st.number_input("p", 0,5,1); d = st.number_input("d",0,2,1); q = st.number_input("q",0,5,1) | |
with st.spinner("Fitting ARIMA…"): | |
model = ARIMA(ts, order=(p,d,q)).fit() | |
fut_idx = pd.date_range(ts.index[-1], periods=steps+1, freq=pd.infer_freq(ts.index) or "D")[1:] | |
forecast = pd.Series(model.forecast(steps), index=fut_idx) | |
fig_fc = px.line(pd.concat([ts, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast") | |
st.plotly_chart(fig_fc, use_container_width=True) | |
# ─────────────────── 6 · EDA EXPANDERS ─────────────────────── | |
st.subheader("🔍 EDA Dashboard") | |
with st.expander("Histogram / Box"): | |
col = st.selectbox("Column", METRIC_COL, key="hist") | |
st.plotly_chart(px.histogram(DF, x=col, marginal="box", template="plotly_dark"), use_container_width=True) | |
with st.expander("Correlation heat‑map"): | |
corr = DF.select_dtypes("number").corr() | |
st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation"), use_container_width=True) | |
# ─────────────────── 7 · STRATEGY BRIEF DOWNLOAD ──────────── | |
brief = ( | |
"# Strategy Brief\n" | |
"* Clean missing timestamps.\n" | |
"* Investigate strongest correlations for causal drivers.\n" | |
"* Use forecast to guide inventory & staffing planning.\n" | |
"* Review outliers weekly (>3σ).\n" | |
"* Segment analysis by region & product for micro‑actions." | |
) | |
st.download_button("⬇️ Strategy (.md)", brief, "bizintel_brief.md", "text/markdown") | |