import streamlit as st import pandas as pd import numpy as np import tempfile from io import BytesIO from sqlalchemy import create_engine import plotly.express as px import matplotlib.pyplot as plt from statsmodels.tsa.arima.model import ARIMA # ── Helpers to read CSV/Excel robustly ─────────────────────────────────────────── @st.cache_data def load_file(uploaded): """Read a CSV or Excel file into a DataFrame.""" try: if uploaded.name.lower().endswith((".xls", ".xlsx")): return pd.read_excel(uploaded, engine="openpyxl") else: return pd.read_csv(uploaded) except Exception as e: raise st.Error(f"Error parsing file: {e}") # ── Helpers for SQL database ──────────────────────────────────────────────────── SUPPORTED_ENGINES = ["postgresql", "mysql", "mssql+pyodbc", "oracle+cx_oracle"] @st.cache_data def list_tables(connection_string): engine = create_engine(connection_string) return engine.table_names() @st.cache_data def fetch_table(connection_string, table_name): engine = create_engine(connection_string) return pd.read_sql_table(table_name, engine) # ── Streamlit page setup ──────────────────────────────────────────────────────── st.set_page_config( page_title="BizIntel AI Ultra", layout="wide", initial_sidebar_state="expanded", ) st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro") # ── Data source selection ─────────────────────────────────────────────────────── data_source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"]) df = None if data_source == "Upload CSV / Excel": uploaded = st.file_uploader( "Drag & drop file here (≤ 500 MB)", type=["csv", "xls", "xlsx"], accept_multiple_files=False, ) if uploaded: with st.spinner("Loading file…"): df = load_file(uploaded) st.success("✅ File loaded into memory") elif data_source == "Connect to SQL Database": engine = st.selectbox("Select DB engine", SUPPORTED_ENGINES) conn_str = st.text_input("Connection string (SQLAlchemy format)", placeholder="e.g. postgresql://user:pass@host:port/dbname") if conn_str: tables = list_tables(conn_str) table = st.selectbox("Choose table", tables) if table: with st.spinner(f"Fetching `{table}`…"): df = fetch_table(conn_str, table) st.success(f"✅ `{table}` loaded from database") # ── If DataFrame is ready, show overview and proceed ─────────────────────────── if df is not None: st.markdown("### 🗂️ Preview") st.dataframe(df.head(5), use_container_width=True) # Dataset overview metrics n_rows, n_cols = df.shape missing_pct = (df.isna().sum().sum() / (n_rows * n_cols)) * 100 st.markdown("---") c1, c2, c3 = st.columns(3) c1.metric("Rows", f"{n_rows:,}") c2.metric("Columns", f"{n_cols:,}") c3.metric("Missing %", f"{missing_pct:.1f}%") # Detailed stats st.markdown("#### 📋 Detailed descriptive statistics") st.dataframe(df.describe(include="all").transpose(), use_container_width=True) # Optional exploratory visuals st.markdown("---") st.markdown("#### 🔎 Optional Exploratory Visuals") col1, col2, col3 = st.columns(3) with col1: if st.checkbox("Histogram"): num_cols = df.select_dtypes(include="number").columns.tolist() col = st.selectbox("Choose numeric column for histogram", num_cols, key="hist") fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}") st.plotly_chart(fig, use_container_width=True) with col2: if st.checkbox("Scatter matrix"): num_cols = df.select_dtypes(include="number").columns.tolist()[:6] # limit to first 6 fig = px.scatter_matrix(df[num_cols], dimensions=num_cols, title="Scatter Matrix") st.plotly_chart(fig, use_container_width=True) with col3: if st.checkbox("Correlation heatmap"): corr = df.select_dtypes(include="number").corr() fig, ax = plt.subplots(figsize=(6, 5)) im = ax.imshow(corr, vmin=-1, vmax=1, cmap="RdBu") plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right") plt.yticks(range(len(corr)), corr.columns) plt.colorbar(im, ax=ax) st.pyplot(fig) # ── Trend & Forecast ────────────────────────────────────────────────────── st.markdown("---") st.markdown("### 📈 Trend & Forecast") # pick date/time column dt_cols = df.columns[df.dtypes.isin([np.dtype("datetime64[ns]"), np.dtype("object")])].tolist() date_col = st.selectbox("Select date/time column", dt_cols) df[date_col] = pd.to_datetime(df[date_col], errors="coerce") # pick numeric metric num_cols = df.select_dtypes(include="number").columns.tolist() metric_col = st.selectbox("Select numeric metric", num_cols) # prepare time series ts = df[[date_col, metric_col]].dropna() ts = ts.set_index(date_col).sort_index() ts = ts[~ts.index.duplicated(keep="first")] # Trend plot fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time") st.plotly_chart(fig_trend, use_container_width=True) # Forecast next 90 days with ARIMA with st.spinner("Running 90-day forecast…"): try: model = ARIMA(ts, order=(1, 1, 1)).fit() fcast = model.get_forecast(90) idx = pd.date_range(ts.index.max(), periods=91, freq="D")[1:] df_f = pd.DataFrame({"forecast": fcast.predicted_mean}, index=idx) fig_fc = px.line( pd.concat([ts, df_f], axis=1), labels={metric_col: metric_col, "forecast": "Forecast"}, title=f"{metric_col} & 90-Day Forecast", ) st.plotly_chart(fig_fc, use_container_width=True) except Exception as e: st.error(f"Forecast failed: {e}") # ── Strategy Recommendations ───────────────────────────────────────────── st.markdown("---") st.markdown("### 🚀 Strategy Recommendations") st.markdown( """ 1. **Data Quality First** Address any missing or malformed dates before further time-series analysis. 2. **Trend & Seasonality** Investigate any upward/downward trends and repeating seasonal patterns. 3. **Outlier Management** Identify extreme highs/lows in your metric—could be bulk orders or data errors. 4. **Segment-Level Analysis** Drill into key dimensions (e.g. region, product) to tailor growth strategies. 5. **Predict & Act** Use your 90-day forecasts to guide inventory, staffing, and marketing decisions. """ ) # downloadable strategy as markdown strategy_md = st.session_state.get("strategy_md", "") if not strategy_md: strategy_md = st.session_state["strategy_md"] = st.container().markdown("…") # dummy to store st.download_button( "📥 Download Strategy (.md)", data=""" # BizIntel AI Ultra – Strategy Recommendations 1. Data Quality First: … 2. Trend & Seasonality: … 3. Outlier Management: … 4. Segment-Level Analysis: … 5. Predict & Act: … """, file_name="strategy.md", mime="text/markdown", )