Spaces:

mgbam
/

BizIntel_AI

Sleeping

File size: 7,642 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from io import BytesIO
from sqlalchemy import create_engine
from statsmodels.tsa.arima.model import ARIMA


# ── CONFIG ───────────────────────────────────────────────────────────────────────
st.set_page_config(
    page_title="BizIntel AI Ultra",
    layout="wide",
    initial_sidebar_state="expanded"
)

# You must set OPENAI_API_KEY in your Streamlit Secrets
openai.api_key = st.secrets["OPENAI_API_KEY"]

# ── CACHEABLE HELPERS ────────────────────────────────────────────────────────────
@st.cache_data
def load_uploaded_file(uploaded):
    """Load CSV or Excel from memory into a DataFrame."""
    try:
        if uploaded.name.lower().endswith((".xls", ".xlsx")):
            return pd.read_excel(uploaded, engine="openpyxl")
        else:
            return pd.read_csv(uploaded)
    except Exception as e:
        st.error(f"⚠️ File parsing failed: {e}")
        return pd.DataFrame()

@st.cache_data
def list_db_tables(conn_str):
    engine = create_engine(conn_str)
    return engine.table_names()

@st.cache_data
def fetch_db_table(conn_str, table):
    engine = create_engine(conn_str)
    return pd.read_sql_table(table, engine)

# ── DATA NARRATIVE VIA OPENAI ───────────────────────────────────────────────────
def generate_data_narrative(df: pd.DataFrame) -> str:
    """Send a summary of df to OpenAI and return a polished narrative."""
    summary = df.describe(include="all").transpose().round(2).to_dict()
    prompt = (
        "You are a world-class data analyst. "
        "Below is a JSON summary of a dataset. "
        "Write a concise, professional narrative highlighting the top 5 business-critical insights, "
        "in bullet format:\n\n"
        f"{summary}\n\n"
    )
    resp = openai.ChatCompletion.create(
        model="gpt-4o-mini",  # or "gpt-4o", "gpt-4o-mini-high"
        messages=[{"role":"user","content":prompt}],
        temperature=0.3,
    )
    return resp.choices[0].message.content.strip()

# ── APP ─────────────────────────────────────────────────────────────────────────
st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")

# 1) Choose data source
source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])

df = pd.DataFrame()
if source == "Upload CSV / Excel":
    uploaded = st.file_uploader(
        "Drag & drop file here (≤500 MB) • .csv, .xls, .xlsx",
        type=["csv","xls","xlsx"]
    )
    if uploaded:
        with st.spinner("Loading file…"):
            df = load_uploaded_file(uploaded)

else:
    engine = st.selectbox("DB engine", ["postgresql","mysql","mssql+pyodbc","oracle+cx_oracle"])
    conn_str = st.text_input("Connection string", placeholder="dialect+driver://user:pass@host/db")
    if conn_str:
        tables = list_db_tables(conn_str)
        table = st.selectbox("Choose table", tables)
        if table:
            with st.spinner(f"Fetching `{table}`…"):
                df = fetch_db_table(conn_str, table)

# 2) If we have data…
if not df.empty:
    st.success("✅ Data loaded!")
    st.markdown("---")

    # 2a) Preview & summary metrics
    st.subheader("🗂 Data Preview & Overview")
    st.dataframe(df.head(5), use_container_width=True)

    r, c = df.shape
    missing_pct = (df.isna().sum().sum() / (r*c) * 100).round(1)
    col1, col2, col3 = st.columns(3)
    col1.metric("Rows", f"{r:,}")
    col2.metric("Cols", f"{c:,}")
    col3.metric("Missing %", f"{missing_pct}%")
    st.markdown("---")

    # 2b) Automated data narrative
    st.subheader("📝 Data Narrative")
    with st.spinner("Generating insights…"):
        narrative = generate_data_narrative(df)
    st.markdown(narrative)

    # 2c) Optional EDA visuals
    st.subheader("🔎 Exploratory Visuals")
    num_cols = df.select_dtypes("number").columns.tolist()
    if st.checkbox("Show histogram"):
        col = st.selectbox("Histogram column", num_cols, key="hist")
        fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}")
        st.plotly_chart(fig, use_container_width=True)

    if st.checkbox("Show scatter matrix"):
        dims = num_cols[:6]
        fig = px.scatter_matrix(df[dims], dimensions=dims, title="Scatter Matrix")
        st.plotly_chart(fig, use_container_width=True)

    if st.checkbox("Show correlation heatmap"):
        corr = df[num_cols].corr()
        fig, ax = plt.subplots(figsize=(6,5))
        im = ax.imshow(corr, cmap="RdBu", vmin=-1, vmax=1)
        plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right")
        plt.yticks(range(len(corr)), corr.columns)
        plt.colorbar(im, ax=ax)
        st.pyplot(fig)

    # 3) Trend & forecast
    st.markdown("---")
    st.subheader("📈 Time-Series Trend & 90-Day Forecast")

    # pick columns
    dt_opts = [col for col in df.columns if pd.api.types.is_datetime64_any_dtype(df[col]) or df[col].dtype == "object"]
    date_col = st.selectbox("Date column", dt_opts)
    df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
    metric_col = st.selectbox("Metric column", num_cols)

    ts = (
        df[[date_col, metric_col]]
        .dropna()
        .set_index(date_col)
        .sort_index()
        .loc[~df.index.duplicated(keep="first")]
    )

    # plot trend
    fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time", labels={"index":"Date"})
    st.plotly_chart(fig_trend, use_container_width=True)

    # forecast
    with st.spinner("Running ARIMA…"):
        try:
            model = ARIMA(ts, order=(1,1,1)).fit()
            future_idx = pd.date_range(start=ts.index.max(), periods=91, freq="D")[1:]
            pred = model.get_forecast(90).predicted_mean
            df_pred = pd.Series(pred.values, index=future_idx, name="Forecast")

            combo = pd.concat([ts[metric_col], df_pred], axis=1)
            fig_fc = px.line(
                combo,
                labels={metric_col:metric_col, "Forecast":"Forecast"},
                title=f"{metric_col} & 90-Day Forecast"
            )
            st.plotly_chart(fig_fc, use_container_width=True)

        except Exception as e:
            st.error(f"Forecast failed: {e}")

    # 4) Strategy download
    st.markdown("---")
    st.subheader("🚀 Actionable Strategy Brief")
    strategy_md = """
# BizIntel AI Ultra – Strategy Brief

**1. Data Quality First**  
Ensure all dates are parsed correctly—critical for any time-series modeling.

**2. Trend & Seasonality**  
Investigate the underlying patterns and adjust your operations calendar.

**3. Outlier Management**  
Flag and validate extreme observations to avoid skewed forecasts.

**4. Segment-Level Insights**  
Drill into regions or product lines for targeted interventions.

**5. Predict & Act**  
Leverage your 90-day projections for inventory, staffing, and marketing plans.
    """.strip()

    st.download_button(
        "📥 Download Strategy (.md)",
        data=strategy_md,
        file_name="bizintel_strategy.md",
        mime="text/markdown"
    )