Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

mgbam commited on May 7

Commit

b5d6aaa

verified ·

1 Parent(s): ca74caa

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -70

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
-"""app.py — BizIntel AI Ultra (Gemini‑only, v3)
 A production‑grade BI copilot with:
- • CSV / Excel / Parquet and live SQL ingestion
- • Memory‑safe chunk loading (≥2 GB) & dtype auto‑fix
- • Instant schema audit + Gemini‑generated insights
- • Drill‑down EDA (histogram, violin, scatter‑matrix, heat‑map)
- • Auto‑detected datetime + user‑tunable ARIMA forecasting
- • One‑click strategy brief (Markdown)
 """
 from __future__ import annotations
 import os, io, tempfile
 from pathlib import Path
@@ -19,23 +20,22 @@ from statsmodels.tsa.arima.model import ARIMA
 from sqlalchemy import create_engine
 import google.generativeai as genai
-# ─────────────────── 0 · CONFIG & SECRETS ────────────────────
 API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
 if not API_KEY:
-    st.error("❌ `GEMINI_APIKEY` missing — add it in *Settings → Secrets* or env vars.")
     st.stop()
 genai.configure(api_key=API_KEY)
 GEM_MODEL = "gemini-1.5-pro-latest"
 TMP = Path(tempfile.gettempdir())
-st.set_page_config("BizIntel AI Ultra", "📊", "wide", initial_sidebar_state="expanded")
-# ─────────────────── 1 · UTILITY HELPERS ─────────────────────
 @st.cache_data(show_spinner=False)
 def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
     suf = Path(buf.name).suffix.lower()
-    if suf in {".xls", ".xlsx"}:  # Excel
         return pd.read_excel(buf, engine="openpyxl")
     if suf == ".parquet":
         return pd.read_parquet(buf)
@@ -53,92 +53,94 @@ def read_table(uri: str, tbl: str) -> pd.DataFrame:
 def ask_gemini(prompt: str) -> str:
     return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()
-# ─────────────────── 2 · DATA INGESTION ──────────────────────
-st.title("📊 BizIntel AI Ultra — Gemini 1.5 Pro BI Copilot")
-mode = st.sidebar.radio("Source", ["File", "SQL"], horizontal=True)
-DF: pd.DataFrame = pd.DataFrame()
-if mode == "File":
-    upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv","xls","xlsx","parquet"], help="≤2 GB")
-    sample = st.sidebar.checkbox("Load sample only (≤ 5 M rows)")
     if upl:
-        DF = read_file(upl, sample)
 else:
     uri = st.sidebar.text_input("SQLAlchemy URI")
     if uri:
-        tbl = st.sidebar.selectbox("Table", sql_tables(uri))
         if tbl:
-            DF = read_table(uri, tbl)
-if DF.empty:
-    st.info("⬅️ Load data to start.")
     st.stop()
-st.success("✅ Data loaded")
-st.dataframe(DF.head(), use_container_width=True)
-# ─────────────────── 3 · QUICK STATS + GEMINI INSIGHT ────────
-rows, cols = DF.shape
-miss = DF.isna().sum().sum() / (rows*cols) * 100
-c1,c2,c3 = st.columns(3)
 c1.metric("Rows", f"{rows:,}")
 c2.metric("Columns", cols)
-c3.metric("Missing %", f"{miss:.1f}")
 st.subheader("🧠 Gemini Insights")
-with st.spinner("Gemini analysing…"):
-    summary = DF.describe(include="all", datetime_is_numeric=True).round(2).to_json()
     st.markdown(ask_gemini(
-        "You are a senior BI analyst. Give 5 concise insights and 3 action items for the dataset: " + summary
     ))
-# ─────────────────── 4 · TIME‑SERIES SELECTION ───────────────
-# attempt datetime coercion
-for c in DF.columns:
-    if not pd.api.types.is_datetime64_any_dtype(DF[c]):
         try:
-            DF[c] = pd.to_datetime(DF[c])
-        except:  # noqa: E722
-            pass
-DATE_COL = st.selectbox("Date column", [c for c in DF.columns if pd.api.types.is_datetime64_any_dtype(DF[c])])
-METRIC_COL = st.selectbox("Numeric metric", [c for c in DF.select_dtypes("number").columns])
-ts = (
-    DF[[DATE_COL, METRIC_COL]].dropna()
       .groupby(DATE_COL)[METRIC_COL].mean().sort_index()
 )
-fig_ts = px.line(ts, title=f"{METRIC_COL} Trend", labels={"index":"Date", METRIC_COL:METRIC_COL})
 st.plotly_chart(fig_ts, use_container_width=True)
-# ─────────────────── 5 · FORECASTING ─────────────────────────
 st.subheader("🔮 Forecast")
-steps = st.slider("Horizon", 3, 365, 90)
-p = st.number_input("p", 0,5,1); d = st.number_input("d",0,2,1); q = st.number_input("q",0,5,1)
-with st.spinner("Fitting ARIMA…"):
-    model = ARIMA(ts, order=(p,d,q)).fit()
-    fut_idx = pd.date_range(ts.index[-1], periods=steps+1, freq=pd.infer_freq(ts.index) or "D")[1:]
     forecast = pd.Series(model.forecast(steps), index=fut_idx)
-fig_fc = px.line(pd.concat([ts, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
 st.plotly_chart(fig_fc, use_container_width=True)
-# ─────────────────── 6 · EDA EXPANDERS ───────────────────────
-st.subheader("🔍 EDA Dashboard")
-with st.expander("Histogram / Box"):
-    col = st.selectbox("Column", METRIC_COL, key="hist")
-    st.plotly_chart(px.histogram(DF, x=col, marginal="box", template="plotly_dark"), use_container_width=True)
-with st.expander("Correlation heat‑map"):
-    corr = DF.select_dtypes("number").corr()
-    st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation"), use_container_width=True)
-# ─────────────────── 7 · STRATEGY BRIEF DOWNLOAD ────────────
 brief = (
     "# Strategy Brief\n"
-    "* Clean missing timestamps.\n"
-    "* Investigate strongest correlations for causal drivers.\n"
-    "* Use forecast to guide inventory & staffing planning.\n"
-    "* Review outliers weekly (>3σ).\n"
-    "* Segment analysis by region & product for micro‑actions."
 )
-st.download_button("⬇️ Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")

+"""app.py — BizIntel AI Ultra (Gemini‑only, v4)
 A production‑grade BI copilot with:
+ • CSV / Excel / Parquet and SQL ingestion
+ • Smart sampling + memory‑safe loading for large files
+ • Schema + missing-data audit with Gemini-generated insights
+ • Drill-down EDA (histogram, violin, scatter-matrix, heatmap)
+ • Auto‑detected date column, tunable ARIMA forecasting
+ • One-click strategy brief download (Markdown)
 """
 from __future__ import annotations
 import os, io, tempfile
 from pathlib import Path
 from sqlalchemy import create_engine
 import google.generativeai as genai
+# ───────────────────── 0 · CONFIGURATION ─────────────────────
 API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
 if not API_KEY:
+    st.error("❌ Missing `GEMINI_APIKEY` — add it in Settings → Secrets or set env variable.")
     st.stop()
+st.set_page_config("BizIntel AI Ultra", "📊", "wide", initial_sidebar_state="expanded")
 genai.configure(api_key=API_KEY)
 GEM_MODEL = "gemini-1.5-pro-latest"
 TMP = Path(tempfile.gettempdir())
+# ───────────────────── 1 · UTILITY HELPERS ───────────────────
 @st.cache_data(show_spinner=False)
 def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
     suf = Path(buf.name).suffix.lower()
+    if suf in {".xls", ".xlsx"}:
         return pd.read_excel(buf, engine="openpyxl")
     if suf == ".parquet":
         return pd.read_parquet(buf)
 def ask_gemini(prompt: str) -> str:
     return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()
+# ───────────────────── 2 · DATA INGESTION ────────────────────
+st.title("📊 BizIntel AI Ultra — Gemini 1.5 Pro BI Copilot")
+mode = st.sidebar.radio("Select Data Source", ["Upload File", "SQL Database"], horizontal=True)
+df: pd.DataFrame = pd.DataFrame()
+if mode == "Upload File":
+    upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv", "xls", "xlsx", "parquet"], help="≤2 GB")
+    sample = st.sidebar.checkbox("Load sample (≤ 5M rows)")
     if upl:
+        df = read_file(upl, sample)
 else:
     uri = st.sidebar.text_input("SQLAlchemy URI")
     if uri:
+        tbl = st.sidebar.selectbox("Choose Table", sql_tables(uri))
         if tbl:
+            df = read_table(uri, tbl)
+if df.empty:
+    st.info("⬅️ Load a dataset to get started.")
     st.stop()
+st.success("✅ Data loaded")
+st.dataframe(df.head(), use_container_width=True)
+# ───────────────────── 3 · SUMMARY + GEMINI ───────────────────
+rows, cols = df.shape
+miss_pct = df.isna().sum().sum() / (rows * cols) * 100
+c1, c2, c3 = st.columns(3)
 c1.metric("Rows", f"{rows:,}")
 c2.metric("Columns", cols)
+c3.metric("Missing %", f"{miss_pct:.1f}")
 st.subheader("🧠 Gemini Insights")
+with st.spinner("Generating analysis..."):
+    summary = df.describe(include="all", datetime_is_numeric=True).round(2).to_json()
     st.markdown(ask_gemini(
+        "You are a senior BI analyst. List 5 key insights and 3 action items based on this dataset: " + summary
     ))
+# ───────────────────── 4 · TIME SERIES SETUP ─────────────────
+# try datetime coercion
+for c in df.columns:
+    if not pd.api.types.is_datetime64_any_dtype(df[c]):
         try:
+            df[c] = pd.to_datetime(df[c])
+        except: pass
+DATE_COL = st.selectbox("Date column", [c for c in df.columns if pd.api.types.is_datetime64_any_dtype(df[c])])
+METRIC_COL = st.selectbox("Numeric metric", [c for c in df.select_dtypes("number").columns])
+series = (
+    df[[DATE_COL, METRIC_COL]].dropna()
       .groupby(DATE_COL)[METRIC_COL].mean().sort_index()
 )
+fig_ts = px.line(series, title=f"{METRIC_COL} Trend", labels={"index": "Date", METRIC_COL: METRIC_COL})
 st.plotly_chart(fig_ts, use_container_width=True)
+# ───────────────────── 5 · ARIMA FORECASTING ─────────────────
 st.subheader("🔮 Forecast")
+steps = st.slider("Forecast Horizon", 3, 365, 90)
+p = st.number_input("AR Order (p)", 0, 5, 1)
+d = st.number_input("Diff Order (d)", 0, 2, 1)
+q = st.number_input("MA Order (q)", 0, 5, 1)
+with st.spinner("Training ARIMA model..."):
+    model = ARIMA(series, order=(p, d, q)).fit()
+    fut_idx = pd.date_range(series.index[-1], periods=steps + 1, freq=pd.infer_freq(series.index) or "D")[1:]
     forecast = pd.Series(model.forecast(steps), index=fut_idx)
+fig_fc = px.line(pd.concat([series, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
 st.plotly_chart(fig_fc, use_container_width=True)
+# ───────────────────── 6 · EDA TOOLS ─────────────────────────
+st.subheader("🔍 Exploratory Data Dashboard")
+with st.expander("Histogram + Box"):
+    col = st.selectbox("Metric column", METRIC_COL, key="hist")
+    st.plotly_chart(px.histogram(df, x=col, marginal="box", template="plotly_dark"), use_container_width=True)
+with st.expander("Correlation Heatmap"):
+    corr = df.select_dtypes("number").corr()
+    st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation Matrix"), use_container_width=True)
+# ───────────────────── 7 · STRATEGY DOWNLOAD ────────────────
 brief = (
     "# Strategy Brief\n"
+    "* Clean missing date values for better time modeling.\n"
+    "* Investigate top correlations for potential drivers.\n"
+    "* Leverage forecast for inventory and staff planning.\n"
+    "* Watch for outliers >3σ weekly.\n"
+    "* Segment by region and product for precise actions."
 )
+st.download_button("⬇️ Download Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")