File size: 6,639 Bytes
b5d6aaa
dc51ef8
b5d6aaa
 
 
 
 
 
5f67bb9
b5d6aaa
5f67bb9
dc51ef8
5f67bb9
dc51ef8
5f67bb9
0a40e29
5f67bb9
0a40e29
 
5f67bb9
 
29aad45
b5d6aaa
dc51ef8
 
b5d6aaa
dc51ef8
 
b5d6aaa
dc51ef8
5f67bb9
dc51ef8
 
b5d6aaa
5f67bb9
dc51ef8
 
b5d6aaa
dc51ef8
 
 
 
5f67bb9
 
dc51ef8
 
5f67bb9
 
dc51ef8
 
5f67bb9
 
dc51ef8
 
 
b5d6aaa
 
 
 
dc51ef8
b5d6aaa
 
 
5f67bb9
b5d6aaa
8a0173b
dc51ef8
 
b5d6aaa
5f67bb9
b5d6aaa
5f67bb9
b5d6aaa
 
5f67bb9
 
b5d6aaa
 
dc51ef8
b5d6aaa
 
 
 
5f67bb9
 
b5d6aaa
5f67bb9
 
b5d6aaa
 
dc51ef8
b5d6aaa
dc51ef8
 
b5d6aaa
 
 
 
5f67bb9
b5d6aaa
 
5f67bb9
b5d6aaa
 
0a40e29
b5d6aaa
 
dc51ef8
 
b5d6aaa
dc51ef8
0a40e29
b5d6aaa
5f67bb9
b5d6aaa
 
 
 
 
 
 
 
dc51ef8
b5d6aaa
5f67bb9
 
b5d6aaa
 
 
 
 
 
 
 
 
dc51ef8
b5d6aaa
5f67bb9
 
b5d6aaa
 
 
 
 
5f67bb9
b5d6aaa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""app.py โ€” BizIntel AI Ultra (Geminiโ€‘only, v4)
A productionโ€‘grade BI copilot with:
 โ€ข CSVโ€ฏ/โ€ฏExcelโ€ฏ/โ€ฏParquet and SQL ingestion
 โ€ข Smart sampling + memoryโ€‘safe loading for large files
 โ€ข Schema + missing-data audit with Gemini-generated insights
 โ€ข Drill-down EDA (histogram, violin, scatter-matrix, heatmap)
 โ€ข Autoโ€‘detected date column, tunable ARIMA forecasting
 โ€ข One-click strategy brief download (Markdown)
"""

from __future__ import annotations
import os, io, tempfile
from pathlib import Path
from typing import List

import pandas as pd
import streamlit as st
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from sqlalchemy import create_engine
import google.generativeai as genai

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 0โ€ฏยทโ€ฏCONFIGURATION โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
if not API_KEY:
    st.error("โŒ Missing `GEMINI_APIKEY` โ€” add it in Settings โ†’ Secrets or set env variable.")
    st.stop()

st.set_page_config("BizIntelย AIย Ultra", "๐Ÿ“Š", "wide", initial_sidebar_state="expanded")
genai.configure(api_key=API_KEY)
GEM_MODEL = "gemini-1.5-pro-latest"
TMP = Path(tempfile.gettempdir())

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 1โ€ฏยทโ€ฏUTILITY HELPERS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
@st.cache_data(show_spinner=False)
def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
    suf = Path(buf.name).suffix.lower()
    if suf in {".xls", ".xlsx"}:
        return pd.read_excel(buf, engine="openpyxl")
    if suf == ".parquet":
        return pd.read_parquet(buf)
    return pd.read_csv(buf, nrows=5_000_000 if sample else None)

@st.cache_data(show_spinner=False)
def sql_tables(uri: str) -> List[str]:
    return create_engine(uri).table_names()

@st.cache_data(show_spinner=True)
def read_table(uri: str, tbl: str) -> pd.DataFrame:
    return pd.read_sql_table(tbl, create_engine(uri))

@st.cache_data(show_spinner=False)
def ask_gemini(prompt: str) -> str:
    return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 2โ€ฏยทโ€ฏDATA INGESTION โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.title("๐Ÿ“Š BizIntel AI Ultra โ€” Gemini 1.5 Pro BI Copilot")
mode = st.sidebar.radio("Select Data Source", ["Upload File", "SQL Database"], horizontal=True)
df: pd.DataFrame = pd.DataFrame()

if mode == "Upload File":
    upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv", "xls", "xlsx", "parquet"], help="โ‰ค2โ€ฏGB")
    sample = st.sidebar.checkbox("Load sample (โ‰ค 5M rows)")
    if upl:
        df = read_file(upl, sample)
else:
    uri = st.sidebar.text_input("SQLAlchemy URI")
    if uri:
        tbl = st.sidebar.selectbox("Choose Table", sql_tables(uri))
        if tbl:
            df = read_table(uri, tbl)

if df.empty:
    st.info("โฌ…๏ธ Load a dataset to get started.")
    st.stop()

st.success("โœ… Data loaded")
st.dataframe(df.head(), use_container_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 3โ€ฏยทโ€ฏSUMMARY + GEMINI โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
rows, cols = df.shape
miss_pct = df.isna().sum().sum() / (rows * cols) * 100
c1, c2, c3 = st.columns(3)
c1.metric("Rows", f"{rows:,}")
c2.metric("Columns", cols)
c3.metric("Missing %", f"{miss_pct:.1f}")

st.subheader("๐Ÿง  Gemini Insights")
with st.spinner("Generating analysis..."):
    summary = df.describe(include="all", datetime_is_numeric=True).round(2).to_json()
    st.markdown(ask_gemini(
        "You are a senior BI analyst. List 5 key insights and 3 action items based on this dataset: " + summary
    ))

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 4โ€ฏยทโ€ฏTIME SERIES SETUP โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# try datetime coercion
for c in df.columns:
    if not pd.api.types.is_datetime64_any_dtype(df[c]):
        try:
            df[c] = pd.to_datetime(df[c])
        except: pass

DATE_COL = st.selectbox("Date column", [c for c in df.columns if pd.api.types.is_datetime64_any_dtype(df[c])])
METRIC_COL = st.selectbox("Numeric metric", [c for c in df.select_dtypes("number").columns])

series = (
    df[[DATE_COL, METRIC_COL]].dropna()
      .groupby(DATE_COL)[METRIC_COL].mean().sort_index()
)
fig_ts = px.line(series, title=f"{METRIC_COL} Trend", labels={"index": "Date", METRIC_COL: METRIC_COL})
st.plotly_chart(fig_ts, use_container_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 5โ€ฏยทโ€ฏARIMA FORECASTING โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.subheader("๐Ÿ”ฎ Forecast")
steps = st.slider("Forecast Horizon", 3, 365, 90)
p = st.number_input("AR Order (p)", 0, 5, 1)
d = st.number_input("Diff Order (d)", 0, 2, 1)
q = st.number_input("MA Order (q)", 0, 5, 1)

with st.spinner("Training ARIMA model..."):
    model = ARIMA(series, order=(p, d, q)).fit()
    fut_idx = pd.date_range(series.index[-1], periods=steps + 1, freq=pd.infer_freq(series.index) or "D")[1:]
    forecast = pd.Series(model.forecast(steps), index=fut_idx)
fig_fc = px.line(pd.concat([series, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
st.plotly_chart(fig_fc, use_container_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 6โ€ฏยทโ€ฏEDA TOOLS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.subheader("๐Ÿ” Exploratory Data Dashboard")
with st.expander("Histogram + Box"):
    col = st.selectbox("Metric column", METRIC_COL, key="hist")
    st.plotly_chart(px.histogram(df, x=col, marginal="box", template="plotly_dark"), use_container_width=True)

with st.expander("Correlation Heatmap"):
    corr = df.select_dtypes("number").corr()
    st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation Matrix"), use_container_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ 7โ€ฏยทโ€ฏSTRATEGY DOWNLOAD โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
brief = (
    "# Strategy Brief\n"
    "* Clean missing date values for better time modeling.\n"
    "* Investigate top correlations for potential drivers.\n"
    "* Leverage forecast for inventory and staff planning.\n"
    "* Watch for outliers >3ฯƒ weekly.\n"
    "* Segment by region and product for precise actions."
)
st.download_button("โฌ‡๏ธ Download Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")