Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import tempfile | |
from io import BytesIO | |
from sqlalchemy import create_engine | |
import plotly.express as px | |
import matplotlib.pyplot as plt | |
from statsmodels.tsa.arima.model import ARIMA | |
# ββ Helpers to read CSV/Excel robustly βββββββββββββββββββββββββββββββββββββββββββ | |
def load_file(uploaded): | |
"""Read a CSV or Excel file into a DataFrame.""" | |
try: | |
if uploaded.name.lower().endswith((".xls", ".xlsx")): | |
return pd.read_excel(uploaded, engine="openpyxl") | |
else: | |
return pd.read_csv(uploaded) | |
except Exception as e: | |
raise st.Error(f"Error parsing file: {e}") | |
# ββ Helpers for SQL database ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
SUPPORTED_ENGINES = ["postgresql", "mysql", "mssql+pyodbc", "oracle+cx_oracle"] | |
def list_tables(connection_string): | |
engine = create_engine(connection_string) | |
return engine.table_names() | |
def fetch_table(connection_string, table_name): | |
engine = create_engine(connection_string) | |
return pd.read_sql_table(table_name, engine) | |
# ββ Streamlit page setup ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
st.set_page_config( | |
page_title="BizIntel AI Ultra", | |
layout="wide", | |
initial_sidebar_state="expanded", | |
) | |
st.title("π BizIntel AI Ultra β Advanced Analytics + Gemini 1.5 Pro") | |
# ββ Data source selection βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
data_source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"]) | |
df = None | |
if data_source == "Upload CSV / Excel": | |
uploaded = st.file_uploader( | |
"Drag & drop file here (β€ 500 MB)", | |
type=["csv", "xls", "xlsx"], | |
accept_multiple_files=False, | |
) | |
if uploaded: | |
with st.spinner("Loading fileβ¦"): | |
df = load_file(uploaded) | |
st.success("β File loaded into memory") | |
elif data_source == "Connect to SQL Database": | |
engine = st.selectbox("Select DB engine", SUPPORTED_ENGINES) | |
conn_str = st.text_input("Connection string (SQLAlchemy format)", placeholder="e.g. postgresql://user:pass@host:port/dbname") | |
if conn_str: | |
tables = list_tables(conn_str) | |
table = st.selectbox("Choose table", tables) | |
if table: | |
with st.spinner(f"Fetching `{table}`β¦"): | |
df = fetch_table(conn_str, table) | |
st.success(f"β `{table}` loaded from database") | |
# ββ If DataFrame is ready, show overview and proceed βββββββββββββββββββββββββββ | |
if df is not None: | |
st.markdown("### ποΈ Preview") | |
st.dataframe(df.head(5), use_container_width=True) | |
# Dataset overview metrics | |
n_rows, n_cols = df.shape | |
missing_pct = (df.isna().sum().sum() / (n_rows * n_cols)) * 100 | |
st.markdown("---") | |
c1, c2, c3 = st.columns(3) | |
c1.metric("Rows", f"{n_rows:,}") | |
c2.metric("Columns", f"{n_cols:,}") | |
c3.metric("Missing %", f"{missing_pct:.1f}%") | |
# Detailed stats | |
st.markdown("#### π Detailed descriptive statistics") | |
st.dataframe(df.describe(include="all").transpose(), use_container_width=True) | |
# Optional exploratory visuals | |
st.markdown("---") | |
st.markdown("#### π Optional Exploratory Visuals") | |
col1, col2, col3 = st.columns(3) | |
with col1: | |
if st.checkbox("Histogram"): | |
num_cols = df.select_dtypes(include="number").columns.tolist() | |
col = st.selectbox("Choose numeric column for histogram", num_cols, key="hist") | |
fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}") | |
st.plotly_chart(fig, use_container_width=True) | |
with col2: | |
if st.checkbox("Scatter matrix"): | |
num_cols = df.select_dtypes(include="number").columns.tolist()[:6] # limit to first 6 | |
fig = px.scatter_matrix(df[num_cols], dimensions=num_cols, title="Scatter Matrix") | |
st.plotly_chart(fig, use_container_width=True) | |
with col3: | |
if st.checkbox("Correlation heatmap"): | |
corr = df.select_dtypes(include="number").corr() | |
fig, ax = plt.subplots(figsize=(6, 5)) | |
im = ax.imshow(corr, vmin=-1, vmax=1, cmap="RdBu") | |
plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right") | |
plt.yticks(range(len(corr)), corr.columns) | |
plt.colorbar(im, ax=ax) | |
st.pyplot(fig) | |
# ββ Trend & Forecast ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
st.markdown("---") | |
st.markdown("### π Trend & Forecast") | |
# pick date/time column | |
dt_cols = df.columns[df.dtypes.isin([np.dtype("datetime64[ns]"), np.dtype("object")])].tolist() | |
date_col = st.selectbox("Select date/time column", dt_cols) | |
df[date_col] = pd.to_datetime(df[date_col], errors="coerce") | |
# pick numeric metric | |
num_cols = df.select_dtypes(include="number").columns.tolist() | |
metric_col = st.selectbox("Select numeric metric", num_cols) | |
# prepare time series | |
ts = df[[date_col, metric_col]].dropna() | |
ts = ts.set_index(date_col).sort_index() | |
ts = ts[~ts.index.duplicated(keep="first")] | |
# Trend plot | |
fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time") | |
st.plotly_chart(fig_trend, use_container_width=True) | |
# Forecast next 90 days with ARIMA | |
with st.spinner("Running 90-day forecastβ¦"): | |
try: | |
model = ARIMA(ts, order=(1, 1, 1)).fit() | |
fcast = model.get_forecast(90) | |
idx = pd.date_range(ts.index.max(), periods=91, freq="D")[1:] | |
df_f = pd.DataFrame({"forecast": fcast.predicted_mean}, index=idx) | |
fig_fc = px.line( | |
pd.concat([ts, df_f], axis=1), | |
labels={metric_col: metric_col, "forecast": "Forecast"}, | |
title=f"{metric_col} & 90-Day Forecast", | |
) | |
st.plotly_chart(fig_fc, use_container_width=True) | |
except Exception as e: | |
st.error(f"Forecast failed: {e}") | |
# ββ Strategy Recommendations βββββββββββββββββββββββββββββββββββββββββββββ | |
st.markdown("---") | |
st.markdown("### π Strategy Recommendations") | |
st.markdown( | |
""" | |
1. **Data Quality First** | |
Address any missing or malformed dates before further time-series analysis. | |
2. **Trend & Seasonality** | |
Investigate any upward/downward trends and repeating seasonal patterns. | |
3. **Outlier Management** | |
Identify extreme highs/lows in your metricβcould be bulk orders or data errors. | |
4. **Segment-Level Analysis** | |
Drill into key dimensions (e.g. region, product) to tailor growth strategies. | |
5. **Predict & Act** | |
Use your 90-day forecasts to guide inventory, staffing, and marketing decisions. | |
""" | |
) | |
# downloadable strategy as markdown | |
strategy_md = st.session_state.get("strategy_md", "") | |
if not strategy_md: | |
strategy_md = st.session_state["strategy_md"] = st.container().markdown("β¦") # dummy to store | |
st.download_button( | |
"π₯ Download Strategy (.md)", | |
data=""" | |
# BizIntel AI Ultra β Strategy Recommendations | |
1. Data Quality First: β¦ | |
2. Trend & Seasonality: β¦ | |
3. Outlier Management: β¦ | |
4. Segment-Level Analysis: β¦ | |
5. Predict & Act: β¦ | |
""", | |
file_name="strategy.md", | |
mime="text/markdown", | |
) | |