BizIntel_AI / app.py
mgbam's picture
Update app.py
dc51ef8 verified
raw
history blame
6.45 kB
"""app.py — BizIntel AI Ultra (Gemini‑only, v3)
A production‑grade BI copilot with:
• CSV / Excel / Parquet and live SQL ingestion
• Memory‑safe chunk loading (≥2 GB) & dtype auto‑fix
• Instant schema audit + Gemini‑generated insights
• Drill‑down EDA (histogram, violin, scatter‑matrix, heat‑map)
• Auto‑detected datetime + user‑tunable ARIMA forecasting
• One‑click strategy brief (Markdown)
"""
from __future__ import annotations
import os, io, tempfile
from pathlib import Path
from typing import List
import pandas as pd
import streamlit as st
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from sqlalchemy import create_engine
import google.generativeai as genai
# ─────────────────── 0 · CONFIG & SECRETS ────────────────────
API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
if not API_KEY:
st.error("❌ `GEMINI_APIKEY` missing — add it in *Settings → Secrets* or env vars.")
st.stop()
genai.configure(api_key=API_KEY)
GEM_MODEL = "gemini-1.5-pro-latest"
TMP = Path(tempfile.gettempdir())
st.set_page_config("BizIntel AI Ultra", "📊", "wide", initial_sidebar_state="expanded")
# ─────────────────── 1 · UTILITY HELPERS ─────────────────────
@st.cache_data(show_spinner=False)
def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
suf = Path(buf.name).suffix.lower()
if suf in {".xls", ".xlsx"}: # Excel
return pd.read_excel(buf, engine="openpyxl")
if suf == ".parquet":
return pd.read_parquet(buf)
return pd.read_csv(buf, nrows=5_000_000 if sample else None)
@st.cache_data(show_spinner=False)
def sql_tables(uri: str) -> List[str]:
return create_engine(uri).table_names()
@st.cache_data(show_spinner=True)
def read_table(uri: str, tbl: str) -> pd.DataFrame:
return pd.read_sql_table(tbl, create_engine(uri))
@st.cache_data(show_spinner=False)
def ask_gemini(prompt: str) -> str:
return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()
# ─────────────────── 2 · DATA INGESTION ──────────────────────
st.title("📊 BizIntel AI Ultra — Gemini 1.5 Pro BI Copilot")
mode = st.sidebar.radio("Source", ["File", "SQL"], horizontal=True)
DF: pd.DataFrame = pd.DataFrame()
if mode == "File":
upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv","xls","xlsx","parquet"], help="≤2 GB")
sample = st.sidebar.checkbox("Load sample only (≤ 5 M rows)")
if upl:
DF = read_file(upl, sample)
else:
uri = st.sidebar.text_input("SQLAlchemy URI")
if uri:
tbl = st.sidebar.selectbox("Table", sql_tables(uri))
if tbl:
DF = read_table(uri, tbl)
if DF.empty:
st.info("⬅️ Load data to start.")
st.stop()
st.success("✅ Data loaded")
st.dataframe(DF.head(), use_container_width=True)
# ─────────────────── 3 · QUICK STATS + GEMINI INSIGHT ────────
rows, cols = DF.shape
miss = DF.isna().sum().sum() / (rows*cols) * 100
c1,c2,c3 = st.columns(3)
c1.metric("Rows", f"{rows:,}")
c2.metric("Columns", cols)
c3.metric("Missing %", f"{miss:.1f}")
st.subheader("🧠 Gemini Insights")
with st.spinner("Gemini analysing…"):
summary = DF.describe(include="all", datetime_is_numeric=True).round(2).to_json()
st.markdown(ask_gemini(
"You are a senior BI analyst. Give 5 concise insights and 3 action items for the dataset: " + summary
))
# ─────────────────── 4 · TIME‑SERIES SELECTION ───────────────
# attempt datetime coercion
for c in DF.columns:
if not pd.api.types.is_datetime64_any_dtype(DF[c]):
try:
DF[c] = pd.to_datetime(DF[c])
except: # noqa: E722
pass
DATE_COL = st.selectbox("Date column", [c for c in DF.columns if pd.api.types.is_datetime64_any_dtype(DF[c])])
METRIC_COL = st.selectbox("Numeric metric", [c for c in DF.select_dtypes("number").columns])
ts = (
DF[[DATE_COL, METRIC_COL]].dropna()
.groupby(DATE_COL)[METRIC_COL].mean().sort_index()
)
fig_ts = px.line(ts, title=f"{METRIC_COL} Trend", labels={"index":"Date", METRIC_COL:METRIC_COL})
st.plotly_chart(fig_ts, use_container_width=True)
# ─────────────────── 5 · FORECASTING ─────────────────────────
st.subheader("🔮 Forecast")
steps = st.slider("Horizon", 3, 365, 90)
p = st.number_input("p", 0,5,1); d = st.number_input("d",0,2,1); q = st.number_input("q",0,5,1)
with st.spinner("Fitting ARIMA…"):
model = ARIMA(ts, order=(p,d,q)).fit()
fut_idx = pd.date_range(ts.index[-1], periods=steps+1, freq=pd.infer_freq(ts.index) or "D")[1:]
forecast = pd.Series(model.forecast(steps), index=fut_idx)
fig_fc = px.line(pd.concat([ts, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
st.plotly_chart(fig_fc, use_container_width=True)
# ─────────────────── 6 · EDA EXPANDERS ───────────────────────
st.subheader("🔍 EDA Dashboard")
with st.expander("Histogram / Box"):
col = st.selectbox("Column", METRIC_COL, key="hist")
st.plotly_chart(px.histogram(DF, x=col, marginal="box", template="plotly_dark"), use_container_width=True)
with st.expander("Correlation heat‑map"):
corr = DF.select_dtypes("number").corr()
st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation"), use_container_width=True)
# ─────────────────── 7 · STRATEGY BRIEF DOWNLOAD ────────────
brief = (
"# Strategy Brief\n"
"* Clean missing timestamps.\n"
"* Investigate strongest correlations for causal drivers.\n"
"* Use forecast to guide inventory & staffing planning.\n"
"* Review outliers weekly (>3σ).\n"
"* Segment analysis by region & product for micro‑actions."
)
st.download_button("⬇️ Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")