Spaces:

mgbam
/

BizIntel_AI

Sleeping

File size: 13,454 Bytes

# app.py — BizIntel AI Ultra v2
# =============================================================
# CSV / Excel / DB ingestion • Trend + ARIMA forecast (90 d or 3 steps)
# Confidence bands • Model explainability • Gemini 1.5 Pro strategy
# Safe Plotly writes -> /tmp • KPI cards • Optional EDA visuals
# =============================================================

import os, tempfile, warnings
from typing import List

import numpy as np
import pandas as pd
import streamlit as st
import plotly.graph_objects as go
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tools.sm_exceptions import ConvergenceWarning
import google.generativeai as genai
import matplotlib.pyplot as plt

# ──────────────────────────────────────────────────────────────
# 0)  Plotly safe write → /tmp
# ──────────────────────────────────────────────────────────────
TMP = tempfile.gettempdir()
orig_write = go.Figure.write_image
go.Figure.write_image = lambda self, p, *a, **k: orig_write(
    self, os.path.join(TMP, os.path.basename(p)), *a, **k
)

# ──────────────────────────────────────────────────────────────
# 1)  Local helpers & DB connector
# ──────────────────────────────────────────────────────────────
from tools.csv_parser      import parse_csv_tool
from tools.plot_generator  import plot_metric_tool
from tools.visuals         import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
from db_connector          import fetch_data_from_db, list_tables, SUPPORTED_ENGINES

# ──────────────────────────────────────────────────────────────
# 2)  Gemini 1.5 Pro
# ──────────────────────────────────────────────────────────────
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
gemini = genai.GenerativeModel(
    "gemini-1.5-pro-latest",
    generation_config=dict(temperature=0.7, top_p=0.9, response_mime_type="text/plain"),
)

# ──────────────────────────────────────────────────────────────
# 3)  Streamlit setup
# ──────────────────────────────────────────────────────────────
st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")

# ──────────────────────────────────────────────────────────────
# 4)  Data source
# ──────────────────────────────────────────────────────────────
choice = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
csv_path: str | None = None

if choice.startswith("Upload"):
    up = st.file_uploader("CSV or Excel (≤ 500 MB)", type=["csv","xlsx","xls"])
    if up:
        tmp = os.path.join(TMP, up.name)
        with open(tmp, "wb") as f: f.write(up.read())
        if up.name.lower().endswith(".csv"):
            csv_path = tmp
        else:
            try:
                pd.read_excel(tmp, sheet_name=0).to_csv(tmp+".csv", index=False)
                csv_path = tmp+".csv"
            except Exception as e:
                st.error(f"Excel parse failed: {e}")
else:
    eng  = st.selectbox("DB engine", SUPPORTED_ENGINES)
    conn = st.text_input("SQLAlchemy connection string")
    if conn:
        try:
            tbl = st.selectbox("Table", list_tables(conn))
            if st.button("Fetch table"):
                csv_path = fetch_data_from_db(conn, tbl)
                st.success(f"Fetched **{tbl}**")
        except Exception as e:
            st.error(f"DB error: {e}")

if not csv_path:
    st.stop()

with open(csv_path, "rb") as f:
    st.download_button("⬇️ Download working CSV", f, file_name=os.path.basename(csv_path))

# ──────────────────────────────────────────────────────────────
# 5)  Column selectors
# ──────────────────────────────────────────────────────────────
df_head = pd.read_csv(csv_path, nrows=5)
st.dataframe(df_head)

date_col = st.selectbox("Date/time column", df_head.columns)
numeric_cols = df_head.select_dtypes("number").columns.tolist()
metric_options = [c for c in numeric_cols if c != date_col]
if not metric_options:
    st.error("No numeric columns available apart from the date column.")
    st.stop()
metric_col = st.selectbox("Numeric metric column", metric_options)

# ──────────────────────────────────────────────────────────────
# 6)  Summary & trend chart
# ──────────────────────────────────────────────────────────────
summary = parse_csv_tool(csv_path)
trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
if isinstance(trend_fig, go.Figure):
    st.subheader("📈 Trend")
    st.plotly_chart(trend_fig, use_container_width=True)
else:
    st.warning(trend_fig)

# ──────────────────────────────────────────────────────────────
# 7)  Robust ARIMA + explainability
# ──────────────────────────────────────────────────────────────
def build_series(path, dcol, vcol):
    df = pd.read_csv(path, usecols=[dcol, vcol])
    df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
    df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
    df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
    if df.empty or df[dcol].nunique() < 2:
        raise ValueError("Need ≥ 2 valid timestamps.")
    s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
    freq = pd.infer_freq(s.index) or "D"
    s = s.asfreq(freq).interpolate()
    return s, freq

@st.cache_data(show_spinner="Fitting ARIMA…")
def fit_arima(series):
    warnings.simplefilter("ignore", ConvergenceWarning)
    model = ARIMA(series, order=(1,1,1))
    return model.fit()

try:
    series, freq = build_series(csv_path, date_col, metric_col)
    horizon = 90 if freq == "D" else 3
    res = fit_arima(series)
    fc  = res.get_forecast(steps=horizon)
    forecast = fc.predicted_mean
    ci = fc.conf_int()
except Exception as e:
    st.subheader(f"🔮 {metric_col} Forecast")
    st.warning(f"Forecast failed: {e}")
    series = forecast = ci = None

if forecast is not None:
    # Plot with CI
    fig = go.Figure()
    fig.add_scatter(x=series.index,   y=series,  mode="lines", name=metric_col)
    fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
    fig.add_scatter(x=ci.index, y=ci.iloc[:,1], mode="lines",
                    line=dict(width=0), showlegend=False)
    fig.add_scatter(x=ci.index, y=ci.iloc[:,0], mode="lines",
                    line=dict(width=0), fill="tonexty",
                    fillcolor="rgba(255,0,0,0.25)", showlegend=False)
    fig.update_layout(title=f"{metric_col} Forecast ({horizon} steps)",
                      template="plotly_dark", xaxis_title=date_col,
                      yaxis_title=metric_col)
    st.subheader(f"🔮 {metric_col} Forecast")
    st.plotly_chart(fig, use_container_width=True)

    # ---------------- summary & interpretation ----------------
    st.subheader("📄 Model Summary")
    st.code(res.summary().as_text(), language="text")

    st.subheader("🗒 Coefficient Interpretation")
    ar = res.arparams
    ma = res.maparams
    interp: List[str] = []
    if ar.size:
        interp.append(f"• AR(1) ={ar[0]:.2f} → "
                      f"{'strong' if abs(ar[0])>0.5 else 'moderate'} "
                      "persistence in the series.")
    if ma.size:
        interp.append(f"• MA(1) ={ma[0]:.2f} → "
                      f"{'large' if abs(ma[0])>0.5 else 'modest'} "
                      "shock adjustment.")
    st.markdown("\n".join(interp) or "N/A")

    # ---------------- Residual ACF ----------------
    st.subheader("🔍 Residual Autocorrelation (ACF)")
    plt.figure(figsize=(6,3))
    plot_acf(res.resid.dropna(), lags=30, alpha=0.05)
    acf_png = os.path.join(TMP, "acf.png")
    plt.tight_layout()
    plt.savefig(acf_png, dpi=120)
    plt.close()
    st.image(acf_png, use_container_width=True)

    # ---------------- Back‑test ----------------
    k = max(int(len(series)*0.2), 10)
    train, test = series[:-k], series[-k:]
    bt_res   = ARIMA(train, order=(1,1,1)).fit()
    bt_pred  = bt_res.forecast(k)
    mape = (abs(bt_pred - test)/test).mean()*100
    rmse = np.sqrt(((bt_pred - test)**2).mean())

    st.subheader("🧪 Back‑test (last 20 %)")
    colA, colB = st.columns(2)
    colA.metric("MAPE", f"{mape:.2f} %")
    colB.metric("RMSE", f"{rmse:,.0f}")

    # ---------------- Optional seasonal decomposition -------
    with st.expander("Seasonal Decomposition"):
        try:
            period = {"D":7, "H":24, "M":12}.get(freq, None)
            if period:
                dec = seasonal_decompose(series, period=period, model="additive")
                for comp in ["trend","seasonal","resid"]:
                    st.line_chart(getattr(dec, comp), height=150)
            else:
                st.info("Frequency not suited for decomposition.")
        except Exception as e:
            st.info(f"Decomposition failed: {e}")

# ──────────────────────────────────────────────────────────────
# 8)  Gemini strategy report
# ──────────────────────────────────────────────────────────────
prompt = (
    "You are **BizIntel Strategist AI**.\n\n"
    f"### Dataset Summary\n```\n{summary}\n```\n\n"
    f"### {metric_col} Forecast\n```\n"
    f"{forecast.to_string() if forecast is not None else 'N/A'}\n```\n\n"
    "Craft a Markdown report:\n"
    "1. Five insights\n2. Three actionable strategies\n"
    "3. Risks / anomalies\n4. Extra visuals to consider."
)
with st.spinner("Gemini generating strategy…"):
    md = gemini.generate_content(prompt).text
st.subheader("🚀 Strategy Recommendations (Gemini 1.5 Pro)")
st.markdown(md)
st.download_button("⬇️ Download Strategy (.md)", md, file_name="strategy.md")

# ──────────────────────────────────────────────────────────────
# 9)  KPI cards + detailed stats + optional EDA  (unchanged)
# ──────────────────────────────────────────────────────────────
fulldf = pd.read_csv(csv_path, low_memory=False)
rows, cols = fulldf.shape
miss_pct = fulldf.isna().mean().mean()*100

st.markdown("---")
st.subheader("📑 Dataset Overview")
c1,c2,c3 = st.columns(3)
c1.metric("Rows", f"{rows:,}")
c2.metric("Columns", cols)
c3.metric("Missing %", f"{miss_pct:.1f}%")

with st.expander("Descriptive Statistics"):
    st.dataframe(fulldf.describe().T.style.format(precision=2).background_gradient("Blues"),
                 use_container_width=True)

st.markdown("---")
st.subheader("🔍 Optional Exploratory Visuals")
num_cols = fulldf.select_dtypes("number").columns.tolist()

if st.checkbox("Histogram"):
    st.plotly_chart(histogram_tool(csv_path, st.selectbox("Var", num_cols, key="hist")),
                    use_container_width=True)

if st.checkbox("Scatter Matrix"):
    sel = st.multiselect("Columns", num_cols, default=num_cols[:3])
    if sel:
        st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)

if st.checkbox("Correlation Heat‑map"):
    st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)