Spaces:
Sleeping
Sleeping
File size: 6,453 Bytes
dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 0a40e29 5f67bb9 0a40e29 5f67bb9 29aad45 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 8a0173b dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 0a40e29 dc51ef8 0a40e29 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
"""app.py — BizIntel AI Ultra (Gemini‑only, v3)
A production‑grade BI copilot with:
• CSV / Excel / Parquet and live SQL ingestion
• Memory‑safe chunk loading (≥2 GB) & dtype auto‑fix
• Instant schema audit + Gemini‑generated insights
• Drill‑down EDA (histogram, violin, scatter‑matrix, heat‑map)
• Auto‑detected datetime + user‑tunable ARIMA forecasting
• One‑click strategy brief (Markdown)
"""
from __future__ import annotations
import os, io, tempfile
from pathlib import Path
from typing import List
import pandas as pd
import streamlit as st
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from sqlalchemy import create_engine
import google.generativeai as genai
# ─────────────────── 0 · CONFIG & SECRETS ────────────────────
API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
if not API_KEY:
st.error("❌ `GEMINI_APIKEY` missing — add it in *Settings → Secrets* or env vars.")
st.stop()
genai.configure(api_key=API_KEY)
GEM_MODEL = "gemini-1.5-pro-latest"
TMP = Path(tempfile.gettempdir())
st.set_page_config("BizIntel AI Ultra", "📊", "wide", initial_sidebar_state="expanded")
# ─────────────────── 1 · UTILITY HELPERS ─────────────────────
@st.cache_data(show_spinner=False)
def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
suf = Path(buf.name).suffix.lower()
if suf in {".xls", ".xlsx"}: # Excel
return pd.read_excel(buf, engine="openpyxl")
if suf == ".parquet":
return pd.read_parquet(buf)
return pd.read_csv(buf, nrows=5_000_000 if sample else None)
@st.cache_data(show_spinner=False)
def sql_tables(uri: str) -> List[str]:
return create_engine(uri).table_names()
@st.cache_data(show_spinner=True)
def read_table(uri: str, tbl: str) -> pd.DataFrame:
return pd.read_sql_table(tbl, create_engine(uri))
@st.cache_data(show_spinner=False)
def ask_gemini(prompt: str) -> str:
return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()
# ─────────────────── 2 · DATA INGESTION ──────────────────────
st.title("📊 BizIntel AI Ultra — Gemini 1.5 Pro BI Copilot")
mode = st.sidebar.radio("Source", ["File", "SQL"], horizontal=True)
DF: pd.DataFrame = pd.DataFrame()
if mode == "File":
upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv","xls","xlsx","parquet"], help="≤2 GB")
sample = st.sidebar.checkbox("Load sample only (≤ 5 M rows)")
if upl:
DF = read_file(upl, sample)
else:
uri = st.sidebar.text_input("SQLAlchemy URI")
if uri:
tbl = st.sidebar.selectbox("Table", sql_tables(uri))
if tbl:
DF = read_table(uri, tbl)
if DF.empty:
st.info("⬅️ Load data to start.")
st.stop()
st.success("✅ Data loaded")
st.dataframe(DF.head(), use_container_width=True)
# ─────────────────── 3 · QUICK STATS + GEMINI INSIGHT ────────
rows, cols = DF.shape
miss = DF.isna().sum().sum() / (rows*cols) * 100
c1,c2,c3 = st.columns(3)
c1.metric("Rows", f"{rows:,}")
c2.metric("Columns", cols)
c3.metric("Missing %", f"{miss:.1f}")
st.subheader("🧠 Gemini Insights")
with st.spinner("Gemini analysing…"):
summary = DF.describe(include="all", datetime_is_numeric=True).round(2).to_json()
st.markdown(ask_gemini(
"You are a senior BI analyst. Give 5 concise insights and 3 action items for the dataset: " + summary
))
# ─────────────────── 4 · TIME‑SERIES SELECTION ───────────────
# attempt datetime coercion
for c in DF.columns:
if not pd.api.types.is_datetime64_any_dtype(DF[c]):
try:
DF[c] = pd.to_datetime(DF[c])
except: # noqa: E722
pass
DATE_COL = st.selectbox("Date column", [c for c in DF.columns if pd.api.types.is_datetime64_any_dtype(DF[c])])
METRIC_COL = st.selectbox("Numeric metric", [c for c in DF.select_dtypes("number").columns])
ts = (
DF[[DATE_COL, METRIC_COL]].dropna()
.groupby(DATE_COL)[METRIC_COL].mean().sort_index()
)
fig_ts = px.line(ts, title=f"{METRIC_COL} Trend", labels={"index":"Date", METRIC_COL:METRIC_COL})
st.plotly_chart(fig_ts, use_container_width=True)
# ─────────────────── 5 · FORECASTING ─────────────────────────
st.subheader("🔮 Forecast")
steps = st.slider("Horizon", 3, 365, 90)
p = st.number_input("p", 0,5,1); d = st.number_input("d",0,2,1); q = st.number_input("q",0,5,1)
with st.spinner("Fitting ARIMA…"):
model = ARIMA(ts, order=(p,d,q)).fit()
fut_idx = pd.date_range(ts.index[-1], periods=steps+1, freq=pd.infer_freq(ts.index) or "D")[1:]
forecast = pd.Series(model.forecast(steps), index=fut_idx)
fig_fc = px.line(pd.concat([ts, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
st.plotly_chart(fig_fc, use_container_width=True)
# ─────────────────── 6 · EDA EXPANDERS ───────────────────────
st.subheader("🔍 EDA Dashboard")
with st.expander("Histogram / Box"):
col = st.selectbox("Column", METRIC_COL, key="hist")
st.plotly_chart(px.histogram(DF, x=col, marginal="box", template="plotly_dark"), use_container_width=True)
with st.expander("Correlation heat‑map"):
corr = DF.select_dtypes("number").corr()
st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation"), use_container_width=True)
# ─────────────────── 7 · STRATEGY BRIEF DOWNLOAD ────────────
brief = (
"# Strategy Brief\n"
"* Clean missing timestamps.\n"
"* Investigate strongest correlations for causal drivers.\n"
"* Use forecast to guide inventory & staffing planning.\n"
"* Review outliers weekly (>3σ).\n"
"* Segment analysis by region & product for micro‑actions."
)
st.download_button("⬇️ Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")
|