Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

BizIntel_AI / app.py

mgbam

Update app.py

dc51ef8 verified about 1 month ago

raw

history blame

6.45 kB

	"""app.py — BizIntel AI Ultra (Gemini‑only, v3)
	A production‑grade BI copilot with:
	• CSV / Excel / Parquet and live SQL ingestion
	• Memory‑safe chunk loading (≥2 GB) & dtype auto‑fix
	• Instant schema audit + Gemini‑generated insights
	• Drill‑down EDA (histogram, violin, scatter‑matrix, heat‑map)
	• Auto‑detected datetime + user‑tunable ARIMA forecasting
	• One‑click strategy brief (Markdown)
	"""
	from __future__ import annotations
	import os, io, tempfile
	from pathlib import Path
	from typing import List

	import pandas as pd
	import streamlit as st
	import plotly.express as px
	from statsmodels.tsa.arima.model import ARIMA
	from sqlalchemy import create_engine
	import google.generativeai as genai

	# ─────────────────── 0 · CONFIG & SECRETS ────────────────────
	API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
	if not API_KEY:
	st.error("❌ `GEMINI_APIKEY` missing — add it in Settings → Secrets or env vars.")
	st.stop()

	genai.configure(api_key=API_KEY)
	GEM_MODEL = "gemini-1.5-pro-latest"
	TMP = Path(tempfile.gettempdir())

	st.set_page_config("BizIntel AI Ultra", "📊", "wide", initial_sidebar_state="expanded")

	# ─────────────────── 1 · UTILITY HELPERS ─────────────────────
	@st.cache_data(show_spinner=False)
	def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
	suf = Path(buf.name).suffix.lower()
	if suf in {".xls", ".xlsx"}: # Excel
	return pd.read_excel(buf, engine="openpyxl")
	if suf == ".parquet":
	return pd.read_parquet(buf)
	return pd.read_csv(buf, nrows=5_000_000 if sample else None)

	@st.cache_data(show_spinner=False)
	def sql_tables(uri: str) -> List[str]:
	return create_engine(uri).table_names()

	@st.cache_data(show_spinner=True)
	def read_table(uri: str, tbl: str) -> pd.DataFrame:
	return pd.read_sql_table(tbl, create_engine(uri))

	@st.cache_data(show_spinner=False)
	def ask_gemini(prompt: str) -> str:
	return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()

	# ─────────────────── 2 · DATA INGESTION ──────────────────────
	st.title("📊 BizIntel AI Ultra — Gemini 1.5 Pro BI Copilot")
	mode = st.sidebar.radio("Source", ["File", "SQL"], horizontal=True)
	DF: pd.DataFrame = pd.DataFrame()

	if mode == "File":
	upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv","xls","xlsx","parquet"], help="≤2 GB")
	sample = st.sidebar.checkbox("Load sample only (≤ 5 M rows)")
	if upl:
	DF = read_file(upl, sample)
	else:
	uri = st.sidebar.text_input("SQLAlchemy URI")
	if uri:
	tbl = st.sidebar.selectbox("Table", sql_tables(uri))
	if tbl:
	DF = read_table(uri, tbl)

	if DF.empty:
	st.info("⬅️ Load data to start.")
	st.stop()

	st.success("✅ Data loaded")
	st.dataframe(DF.head(), use_container_width=True)

	# ─────────────────── 3 · QUICK STATS + GEMINI INSIGHT ────────
	rows, cols = DF.shape
	miss = DF.isna().sum().sum() / (rowscols) 100
	c1,c2,c3 = st.columns(3)
	c1.metric("Rows", f"{rows:,}")
	c2.metric("Columns", cols)
	c3.metric("Missing %", f"{miss:.1f}")

	st.subheader("🧠 Gemini Insights")
	with st.spinner("Gemini analysing…"):
	summary = DF.describe(include="all", datetime_is_numeric=True).round(2).to_json()
	st.markdown(ask_gemini(
	"You are a senior BI analyst. Give 5 concise insights and 3 action items for the dataset: " + summary
	))

	# ─────────────────── 4 · TIME‑SERIES SELECTION ───────────────
	# attempt datetime coercion
	for c in DF.columns:
	if not pd.api.types.is_datetime64_any_dtype(DF[c]):
	try:
	DF[c] = pd.to_datetime(DF[c])
	except: # noqa: E722
	pass

	DATE_COL = st.selectbox("Date column", [c for c in DF.columns if pd.api.types.is_datetime64_any_dtype(DF[c])])
	METRIC_COL = st.selectbox("Numeric metric", [c for c in DF.select_dtypes("number").columns])

	ts = (
	DF[[DATE_COL, METRIC_COL]].dropna()
	.groupby(DATE_COL)[METRIC_COL].mean().sort_index()
	)
	fig_ts = px.line(ts, title=f"{METRIC_COL} Trend", labels={"index":"Date", METRIC_COL:METRIC_COL})
	st.plotly_chart(fig_ts, use_container_width=True)

	# ─────────────────── 5 · FORECASTING ─────────────────────────
	st.subheader("🔮 Forecast")
	steps = st.slider("Horizon", 3, 365, 90)
	p = st.number_input("p", 0,5,1); d = st.number_input("d",0,2,1); q = st.number_input("q",0,5,1)
	with st.spinner("Fitting ARIMA…"):
	model = ARIMA(ts, order=(p,d,q)).fit()
	fut_idx = pd.date_range(ts.index[-1], periods=steps+1, freq=pd.infer_freq(ts.index) or "D")[1:]
	forecast = pd.Series(model.forecast(steps), index=fut_idx)
	fig_fc = px.line(pd.concat([ts, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
	st.plotly_chart(fig_fc, use_container_width=True)

	# ─────────────────── 6 · EDA EXPANDERS ───────────────────────
	st.subheader("🔍 EDA Dashboard")
	with st.expander("Histogram / Box"):
	col = st.selectbox("Column", METRIC_COL, key="hist")
	st.plotly_chart(px.histogram(DF, x=col, marginal="box", template="plotly_dark"), use_container_width=True)
	with st.expander("Correlation heat‑map"):
	corr = DF.select_dtypes("number").corr()
	st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation"), use_container_width=True)

	# ─────────────────── 7 · STRATEGY BRIEF DOWNLOAD ────────────
	brief = (
	"# Strategy Brief\n"
	"* Clean missing timestamps.\n"
	"* Investigate strongest correlations for causal drivers.\n"
	"* Use forecast to guide inventory & staffing planning.\n"
	"* Review outliers weekly (>3σ).\n"
	"* Segment analysis by region & product for micro‑actions."
	)
	st.download_button("⬇️ Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")