File size: 6,453 Bytes
dc51ef8
 
 
 
 
 
 
 
5f67bb9
 
dc51ef8
5f67bb9
dc51ef8
5f67bb9
0a40e29
5f67bb9
0a40e29
 
5f67bb9
 
29aad45
dc51ef8
 
 
 
 
 
 
5f67bb9
dc51ef8
 
 
5f67bb9
dc51ef8
5f67bb9
dc51ef8
 
 
 
 
 
 
5f67bb9
 
dc51ef8
 
5f67bb9
 
dc51ef8
 
5f67bb9
 
dc51ef8
 
 
 
 
 
 
 
 
 
 
5f67bb9
dc51ef8
8a0173b
dc51ef8
 
 
5f67bb9
dc51ef8
5f67bb9
dc51ef8
 
5f67bb9
 
dc51ef8
 
 
 
 
 
5f67bb9
 
 
dc51ef8
5f67bb9
 
dc51ef8
 
 
 
 
 
 
 
 
 
5f67bb9
dc51ef8
5f67bb9
 
 
dc51ef8
 
0a40e29
dc51ef8
 
 
 
 
 
0a40e29
dc51ef8
5f67bb9
dc51ef8
 
 
 
 
 
 
5f67bb9
 
dc51ef8
 
 
 
 
 
 
 
 
 
5f67bb9
 
dc51ef8
 
 
 
 
5f67bb9
dc51ef8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""app.py — BizIntel AI Ultra (Gemini‑only, v3)
A production‑grade BI copilot with:
 • CSV / Excel / Parquet and live SQL ingestion
 • Memory‑safe chunk loading (≥2 GB) & dtype auto‑fix
 • Instant schema audit + Gemini‑generated insights
 • Drill‑down EDA (histogram, violin, scatter‑matrix, heat‑map)
 • Auto‑detected datetime + user‑tunable ARIMA forecasting
 • One‑click strategy brief (Markdown)
"""
from __future__ import annotations
import os, io, tempfile
from pathlib import Path
from typing import List

import pandas as pd
import streamlit as st
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from sqlalchemy import create_engine
import google.generativeai as genai

# ─────────────────── 0 · CONFIG & SECRETS ────────────────────
API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
if not API_KEY:
    st.error("❌ `GEMINI_APIKEY` missing — add it in *Settings → Secrets* or env vars.")
    st.stop()

genai.configure(api_key=API_KEY)
GEM_MODEL = "gemini-1.5-pro-latest"
TMP = Path(tempfile.gettempdir())

st.set_page_config("BizIntel AI Ultra", "📊", "wide", initial_sidebar_state="expanded")

# ─────────────────── 1 · UTILITY HELPERS ─────────────────────
@st.cache_data(show_spinner=False)
def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
    suf = Path(buf.name).suffix.lower()
    if suf in {".xls", ".xlsx"}:  # Excel
        return pd.read_excel(buf, engine="openpyxl")
    if suf == ".parquet":
        return pd.read_parquet(buf)
    return pd.read_csv(buf, nrows=5_000_000 if sample else None)

@st.cache_data(show_spinner=False)
def sql_tables(uri: str) -> List[str]:
    return create_engine(uri).table_names()

@st.cache_data(show_spinner=True)
def read_table(uri: str, tbl: str) -> pd.DataFrame:
    return pd.read_sql_table(tbl, create_engine(uri))

@st.cache_data(show_spinner=False)
def ask_gemini(prompt: str) -> str:
    return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()

# ─────────────────── 2 · DATA INGESTION ──────────────────────
st.title("📊 BizIntel AI Ultra — Gemini 1.5 Pro BI Copilot")
mode = st.sidebar.radio("Source", ["File", "SQL"], horizontal=True)
DF: pd.DataFrame = pd.DataFrame()

if mode == "File":
    upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv","xls","xlsx","parquet"], help="≤2 GB")
    sample = st.sidebar.checkbox("Load sample only (≤ 5 M rows)")
    if upl:
        DF = read_file(upl, sample)
else:
    uri = st.sidebar.text_input("SQLAlchemy URI")
    if uri:
        tbl = st.sidebar.selectbox("Table", sql_tables(uri))
        if tbl:
            DF = read_table(uri, tbl)

if DF.empty:
    st.info("⬅️ Load data to start.")
    st.stop()

st.success("✅ Data loaded")
st.dataframe(DF.head(), use_container_width=True)

# ─────────────────── 3 · QUICK STATS + GEMINI INSIGHT ────────
rows, cols = DF.shape
miss = DF.isna().sum().sum() / (rows*cols) * 100
c1,c2,c3 = st.columns(3)
c1.metric("Rows", f"{rows:,}")
c2.metric("Columns", cols)
c3.metric("Missing %", f"{miss:.1f}")

st.subheader("🧠 Gemini Insights")
with st.spinner("Gemini analysing…"):
    summary = DF.describe(include="all", datetime_is_numeric=True).round(2).to_json()
    st.markdown(ask_gemini(
        "You are a senior BI analyst. Give 5 concise insights and 3 action items for the dataset: " + summary
    ))

# ─────────────────── 4 · TIME‑SERIES SELECTION ───────────────
# attempt datetime coercion
for c in DF.columns:
    if not pd.api.types.is_datetime64_any_dtype(DF[c]):
        try:
            DF[c] = pd.to_datetime(DF[c])
        except:  # noqa: E722
            pass

DATE_COL = st.selectbox("Date column", [c for c in DF.columns if pd.api.types.is_datetime64_any_dtype(DF[c])])
METRIC_COL = st.selectbox("Numeric metric", [c for c in DF.select_dtypes("number").columns])

ts = (
    DF[[DATE_COL, METRIC_COL]].dropna()
      .groupby(DATE_COL)[METRIC_COL].mean().sort_index()
)
fig_ts = px.line(ts, title=f"{METRIC_COL} Trend", labels={"index":"Date", METRIC_COL:METRIC_COL})
st.plotly_chart(fig_ts, use_container_width=True)

# ─────────────────── 5 · FORECASTING ─────────────────────────
st.subheader("🔮 Forecast")
steps = st.slider("Horizon", 3, 365, 90)
p = st.number_input("p", 0,5,1); d = st.number_input("d",0,2,1); q = st.number_input("q",0,5,1)
with st.spinner("Fitting ARIMA…"):
    model = ARIMA(ts, order=(p,d,q)).fit()
    fut_idx = pd.date_range(ts.index[-1], periods=steps+1, freq=pd.infer_freq(ts.index) or "D")[1:]
    forecast = pd.Series(model.forecast(steps), index=fut_idx)
fig_fc = px.line(pd.concat([ts, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
st.plotly_chart(fig_fc, use_container_width=True)

# ─────────────────── 6 · EDA EXPANDERS ───────────────────────
st.subheader("🔍 EDA Dashboard")
with st.expander("Histogram / Box"):
    col = st.selectbox("Column", METRIC_COL, key="hist")
    st.plotly_chart(px.histogram(DF, x=col, marginal="box", template="plotly_dark"), use_container_width=True)
with st.expander("Correlation heat‑map"):
    corr = DF.select_dtypes("number").corr()
    st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation"), use_container_width=True)

# ─────────────────── 7 · STRATEGY BRIEF DOWNLOAD ────────────
brief = (
    "# Strategy Brief\n"
    "* Clean missing timestamps.\n"
    "* Investigate strongest correlations for causal drivers.\n"
    "* Use forecast to guide inventory & staffing planning.\n"
    "* Review outliers weekly (>3σ).\n"
    "* Segment analysis by region & product for micro‑actions."
)
st.download_button("⬇️ Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")