Spaces:
Sleeping
Sleeping
"""app.py โ BizIntel AI Ultra (Geminiโonly, v4) | |
A productionโgrade BI copilot with: | |
โข CSVโฏ/โฏExcelโฏ/โฏParquet and SQL ingestion | |
โข Smart sampling + memoryโsafe loading for large files | |
โข Schema + missing-data audit with Gemini-generated insights | |
โข Drill-down EDA (histogram, violin, scatter-matrix, heatmap) | |
โข Autoโdetected date column, tunable ARIMA forecasting | |
โข One-click strategy brief download (Markdown) | |
""" | |
from __future__ import annotations | |
import os, io, tempfile | |
from pathlib import Path | |
from typing import List | |
import pandas as pd | |
import streamlit as st | |
import plotly.express as px | |
from statsmodels.tsa.arima.model import ARIMA | |
from sqlalchemy import create_engine | |
import google.generativeai as genai | |
# โโโโโโโโโโโโโโโโโโโโโ 0โฏยทโฏCONFIGURATION โโโโโโโโโโโโโโโโโโโโโ | |
API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY") | |
if not API_KEY: | |
st.error("โ Missing `GEMINI_APIKEY` โ add it in Settings โ Secrets or set env variable.") | |
st.stop() | |
st.set_page_config("BizIntelย AIย Ultra", "๐", "wide", initial_sidebar_state="expanded") | |
genai.configure(api_key=API_KEY) | |
GEM_MODEL = "gemini-1.5-pro-latest" | |
TMP = Path(tempfile.gettempdir()) | |
# โโโโโโโโโโโโโโโโโโโโโ 1โฏยทโฏUTILITY HELPERS โโโโโโโโโโโโโโโโโโโ | |
def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame: | |
suf = Path(buf.name).suffix.lower() | |
if suf in {".xls", ".xlsx"}: | |
return pd.read_excel(buf, engine="openpyxl") | |
if suf == ".parquet": | |
return pd.read_parquet(buf) | |
return pd.read_csv(buf, nrows=5_000_000 if sample else None) | |
def sql_tables(uri: str) -> List[str]: | |
return create_engine(uri).table_names() | |
def read_table(uri: str, tbl: str) -> pd.DataFrame: | |
return pd.read_sql_table(tbl, create_engine(uri)) | |
def ask_gemini(prompt: str) -> str: | |
return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip() | |
# โโโโโโโโโโโโโโโโโโโโโ 2โฏยทโฏDATA INGESTION โโโโโโโโโโโโโโโโโโโโ | |
st.title("๐ BizIntel AI Ultra โ Gemini 1.5 Pro BI Copilot") | |
mode = st.sidebar.radio("Select Data Source", ["Upload File", "SQL Database"], horizontal=True) | |
df: pd.DataFrame = pd.DataFrame() | |
if mode == "Upload File": | |
upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv", "xls", "xlsx", "parquet"], help="โค2โฏGB") | |
sample = st.sidebar.checkbox("Load sample (โค 5M rows)") | |
if upl: | |
df = read_file(upl, sample) | |
else: | |
uri = st.sidebar.text_input("SQLAlchemy URI") | |
if uri: | |
tbl = st.sidebar.selectbox("Choose Table", sql_tables(uri)) | |
if tbl: | |
df = read_table(uri, tbl) | |
if df.empty: | |
st.info("โฌ ๏ธ Load a dataset to get started.") | |
st.stop() | |
st.success("โ Data loaded") | |
st.dataframe(df.head(), use_container_width=True) | |
# โโโโโโโโโโโโโโโโโโโโโ 3โฏยทโฏSUMMARY + GEMINI โโโโโโโโโโโโโโโโโโโ | |
rows, cols = df.shape | |
miss_pct = df.isna().sum().sum() / (rows * cols) * 100 | |
c1, c2, c3 = st.columns(3) | |
c1.metric("Rows", f"{rows:,}") | |
c2.metric("Columns", cols) | |
c3.metric("Missing %", f"{miss_pct:.1f}") | |
st.subheader("๐ง Gemini Insights") | |
with st.spinner("Generating analysis..."): | |
summary = df.describe(include="all", datetime_is_numeric=True).round(2).to_json() | |
st.markdown(ask_gemini( | |
"You are a senior BI analyst. List 5 key insights and 3 action items based on this dataset: " + summary | |
)) | |
# โโโโโโโโโโโโโโโโโโโโโ 4โฏยทโฏTIME SERIES SETUP โโโโโโโโโโโโโโโโโ | |
# try datetime coercion | |
for c in df.columns: | |
if not pd.api.types.is_datetime64_any_dtype(df[c]): | |
try: | |
df[c] = pd.to_datetime(df[c]) | |
except: pass | |
DATE_COL = st.selectbox("Date column", [c for c in df.columns if pd.api.types.is_datetime64_any_dtype(df[c])]) | |
METRIC_COL = st.selectbox("Numeric metric", [c for c in df.select_dtypes("number").columns]) | |
series = ( | |
df[[DATE_COL, METRIC_COL]].dropna() | |
.groupby(DATE_COL)[METRIC_COL].mean().sort_index() | |
) | |
fig_ts = px.line(series, title=f"{METRIC_COL} Trend", labels={"index": "Date", METRIC_COL: METRIC_COL}) | |
st.plotly_chart(fig_ts, use_container_width=True) | |
# โโโโโโโโโโโโโโโโโโโโโ 5โฏยทโฏARIMA FORECASTING โโโโโโโโโโโโโโโโโ | |
st.subheader("๐ฎ Forecast") | |
steps = st.slider("Forecast Horizon", 3, 365, 90) | |
p = st.number_input("AR Order (p)", 0, 5, 1) | |
d = st.number_input("Diff Order (d)", 0, 2, 1) | |
q = st.number_input("MA Order (q)", 0, 5, 1) | |
with st.spinner("Training ARIMA model..."): | |
model = ARIMA(series, order=(p, d, q)).fit() | |
fut_idx = pd.date_range(series.index[-1], periods=steps + 1, freq=pd.infer_freq(series.index) or "D")[1:] | |
forecast = pd.Series(model.forecast(steps), index=fut_idx) | |
fig_fc = px.line(pd.concat([series, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast") | |
st.plotly_chart(fig_fc, use_container_width=True) | |
# โโโโโโโโโโโโโโโโโโโโโ 6โฏยทโฏEDA TOOLS โโโโโโโโโโโโโโโโโโโโโโโโโ | |
st.subheader("๐ Exploratory Data Dashboard") | |
with st.expander("Histogram + Box"): | |
col = st.selectbox("Metric column", METRIC_COL, key="hist") | |
st.plotly_chart(px.histogram(df, x=col, marginal="box", template="plotly_dark"), use_container_width=True) | |
with st.expander("Correlation Heatmap"): | |
corr = df.select_dtypes("number").corr() | |
st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation Matrix"), use_container_width=True) | |
# โโโโโโโโโโโโโโโโโโโโโ 7โฏยทโฏSTRATEGY DOWNLOAD โโโโโโโโโโโโโโโโ | |
brief = ( | |
"# Strategy Brief\n" | |
"* Clean missing date values for better time modeling.\n" | |
"* Investigate top correlations for potential drivers.\n" | |
"* Leverage forecast for inventory and staff planning.\n" | |
"* Watch for outliers >3ฯ weekly.\n" | |
"* Segment by region and product for precise actions." | |
) | |
st.download_button("โฌ๏ธ Download Strategy (.md)", brief, "bizintel_brief.md", "text/markdown") | |