Spaces:
Sleeping
Sleeping
# app.py โ BizIntelย AIย Ultraย v2 | |
# ============================================================= | |
# CSVย /ย Excelย /ย DB ingestion โข Trend + ARIMA forecast (90ย d or 3ย steps) | |
# Confidence bands โข Model explainability โข Geminiย 1.5 Pro strategy | |
# Safe Plotly writes -> /tmp โข KPI cards โข Optional EDA visuals | |
# ============================================================= | |
import os, tempfile, warnings | |
from typing import List | |
import numpy as np | |
import pandas as pd | |
import streamlit as st | |
import plotly.graph_objects as go | |
from statsmodels.tsa.arima.model import ARIMA | |
from statsmodels.graphics.tsaplots import plot_acf | |
from statsmodels.tsa.seasonal import seasonal_decompose | |
from statsmodels.tools.sm_exceptions import ConvergenceWarning | |
import google.generativeai as genai | |
import matplotlib.pyplot as plt | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 0) Plotly safe write โ /tmp | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
TMP = tempfile.gettempdir() | |
orig_write = go.Figure.write_image | |
go.Figure.write_image = lambda self, p, *a, **k: orig_write( | |
self, os.path.join(TMP, os.path.basename(p)), *a, **k | |
) | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 1) Local helpers & DB connector | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
from tools.csv_parser import parse_csv_tool | |
from tools.plot_generator import plot_metric_tool | |
from tools.visuals import histogram_tool, scatter_matrix_tool, corr_heatmap_tool | |
from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 2) Gemini 1.5ย Pro | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
genai.configure(api_key=os.getenv("GEMINI_APIKEY")) | |
gemini = genai.GenerativeModel( | |
"gemini-1.5-pro-latest", | |
generation_config=dict(temperature=0.7, top_p=0.9, response_mime_type="text/plain"), | |
) | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 3) Streamlit setup | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
st.set_page_config(page_title="BizIntelย AIย Ultra", layout="wide") | |
st.title("๐ย BizIntelย AIย Ultraย โ Advanced Analyticsย +ย Geminiย 1.5ย Pro") | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 4) Data source | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
choice = st.radio("Select data source", ["Upload CSVย /ย Excel", "Connect to SQL Database"]) | |
csv_path: str | None = None | |
if choice.startswith("Upload"): | |
up = st.file_uploader("CSVย orย Excelย (โคโฏ500โฏMB)", type=["csv","xlsx","xls"]) | |
if up: | |
tmp = os.path.join(TMP, up.name) | |
with open(tmp, "wb") as f: f.write(up.read()) | |
if up.name.lower().endswith(".csv"): | |
csv_path = tmp | |
else: | |
try: | |
pd.read_excel(tmp, sheet_name=0).to_csv(tmp+".csv", index=False) | |
csv_path = tmp+".csv" | |
except Exception as e: | |
st.error(f"Excel parse failed: {e}") | |
else: | |
eng = st.selectbox("DB engine", SUPPORTED_ENGINES) | |
conn = st.text_input("SQLAlchemyย connection string") | |
if conn: | |
try: | |
tbl = st.selectbox("Table", list_tables(conn)) | |
if st.button("Fetch table"): | |
csv_path = fetch_data_from_db(conn, tbl) | |
st.success(f"Fetched **{tbl}**") | |
except Exception as e: | |
st.error(f"DB error: {e}") | |
if not csv_path: | |
st.stop() | |
with open(csv_path, "rb") as f: | |
st.download_button("โฌ๏ธย Download working CSV", f, file_name=os.path.basename(csv_path)) | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 5) Column selectors | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
df_head = pd.read_csv(csv_path, nrows=5) | |
st.dataframe(df_head) | |
date_col = st.selectbox("Date/time column", df_head.columns) | |
numeric_cols = df_head.select_dtypes("number").columns.tolist() | |
metric_options = [c for c in numeric_cols if c != date_col] | |
if not metric_options: | |
st.error("No numeric columns available apart from the date column.") | |
st.stop() | |
metric_col = st.selectbox("Numeric metric column", metric_options) | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 6) Summary & trend chart | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
summary = parse_csv_tool(csv_path) | |
trend_fig = plot_metric_tool(csv_path, date_col, metric_col) | |
if isinstance(trend_fig, go.Figure): | |
st.subheader("๐ย Trend") | |
st.plotly_chart(trend_fig, use_container_width=True) | |
else: | |
st.warning(trend_fig) | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 7) Robust ARIMA + explainability | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
def build_series(path, dcol, vcol): | |
df = pd.read_csv(path, usecols=[dcol, vcol]) | |
df[dcol] = pd.to_datetime(df[dcol], errors="coerce") | |
df[vcol] = pd.to_numeric(df[vcol], errors="coerce") | |
df = df.dropna(subset=[dcol, vcol]).sort_values(dcol) | |
if df.empty or df[dcol].nunique() < 2: | |
raise ValueError("Need โฅโฏ2 valid timestamps.") | |
s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index() | |
freq = pd.infer_freq(s.index) or "D" | |
s = s.asfreq(freq).interpolate() | |
return s, freq | |
def fit_arima(series): | |
warnings.simplefilter("ignore", ConvergenceWarning) | |
model = ARIMA(series, order=(1,1,1)) | |
return model.fit() | |
try: | |
series, freq = build_series(csv_path, date_col, metric_col) | |
horizon = 90 if freq == "D" else 3 | |
res = fit_arima(series) | |
fc = res.get_forecast(steps=horizon) | |
forecast = fc.predicted_mean | |
ci = fc.conf_int() | |
except Exception as e: | |
st.subheader(f"๐ฎย {metric_col}ย Forecast") | |
st.warning(f"Forecast failed: {e}") | |
series = forecast = ci = None | |
if forecast is not None: | |
# Plot with CI | |
fig = go.Figure() | |
fig.add_scatter(x=series.index, y=series, mode="lines", name=metric_col) | |
fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast") | |
fig.add_scatter(x=ci.index, y=ci.iloc[:,1], mode="lines", | |
line=dict(width=0), showlegend=False) | |
fig.add_scatter(x=ci.index, y=ci.iloc[:,0], mode="lines", | |
line=dict(width=0), fill="tonexty", | |
fillcolor="rgba(255,0,0,0.25)", showlegend=False) | |
fig.update_layout(title=f"{metric_col} Forecast ({horizon}ย steps)", | |
template="plotly_dark", xaxis_title=date_col, | |
yaxis_title=metric_col) | |
st.subheader(f"๐ฎย {metric_col}ย Forecast") | |
st.plotly_chart(fig, use_container_width=True) | |
# ---------------- summary & interpretation ---------------- | |
st.subheader("๐ย Model Summary") | |
st.code(res.summary().as_text(), language="text") | |
st.subheader("๐ย Coefficient Interpretation") | |
ar = res.arparams | |
ma = res.maparams | |
interp: List[str] = [] | |
if ar.size: | |
interp.append(f"โขย AR(1)ย ={ar[0]:.2f} โ " | |
f"{'strong' if abs(ar[0])>0.5 else 'moderate'} " | |
"persistence in the series.") | |
if ma.size: | |
interp.append(f"โขย MA(1)ย ={ma[0]:.2f} โ " | |
f"{'large' if abs(ma[0])>0.5 else 'modest'} " | |
"shock adjustment.") | |
st.markdown("\n".join(interp) or "N/A") | |
# ---------------- Residual ACF ---------------- | |
st.subheader("๐ย Residual Autocorrelation (ACF)") | |
plt.figure(figsize=(6,3)) | |
plot_acf(res.resid.dropna(), lags=30, alpha=0.05) | |
acf_png = os.path.join(TMP, "acf.png") | |
plt.tight_layout() | |
plt.savefig(acf_png, dpi=120) | |
plt.close() | |
st.image(acf_png, use_container_width=True) | |
# ---------------- Backโtest ---------------- | |
k = max(int(len(series)*0.2), 10) | |
train, test = series[:-k], series[-k:] | |
bt_res = ARIMA(train, order=(1,1,1)).fit() | |
bt_pred = bt_res.forecast(k) | |
mape = (abs(bt_pred - test)/test).mean()*100 | |
rmse = np.sqrt(((bt_pred - test)**2).mean()) | |
st.subheader("๐งชย Backโtest (last 20โฏ%)") | |
colA, colB = st.columns(2) | |
colA.metric("MAPE", f"{mape:.2f}ย %") | |
colB.metric("RMSE", f"{rmse:,.0f}") | |
# ---------------- Optional seasonal decomposition ------- | |
with st.expander("Seasonal Decomposition"): | |
try: | |
period = {"D":7, "H":24, "M":12}.get(freq, None) | |
if period: | |
dec = seasonal_decompose(series, period=period, model="additive") | |
for comp in ["trend","seasonal","resid"]: | |
st.line_chart(getattr(dec, comp), height=150) | |
else: | |
st.info("Frequency not suited for decomposition.") | |
except Exception as e: | |
st.info(f"Decomposition failed: {e}") | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 8) Gemini strategy report | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
prompt = ( | |
"You are **BizIntel Strategist AI**.\n\n" | |
f"### Dataset Summary\n```\n{summary}\n```\n\n" | |
f"### {metric_col} Forecast\n```\n" | |
f"{forecast.to_string() if forecast is not None else 'N/A'}\n```\n\n" | |
"Craft a Markdown report:\n" | |
"1. Five insights\n2. Three actionable strategies\n" | |
"3. Risksย / anomalies\n4. Extra visuals to consider." | |
) | |
with st.spinner("Gemini generating strategyโฆ"): | |
md = gemini.generate_content(prompt).text | |
st.subheader("๐ย Strategyย Recommendationsย (Geminiย 1.5ย Pro)") | |
st.markdown(md) | |
st.download_button("โฌ๏ธย Downloadย Strategy (.md)", md, file_name="strategy.md") | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
# 9) KPI cards + detailed stats + optional EDA (unchanged) | |
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ | |
fulldf = pd.read_csv(csv_path, low_memory=False) | |
rows, cols = fulldf.shape | |
miss_pct = fulldf.isna().mean().mean()*100 | |
st.markdown("---") | |
st.subheader("๐ย Datasetย Overview") | |
c1,c2,c3 = st.columns(3) | |
c1.metric("Rows", f"{rows:,}") | |
c2.metric("Columns", cols) | |
c3.metric("Missingย %", f"{miss_pct:.1f}%") | |
with st.expander("Descriptiveย Statistics"): | |
st.dataframe(fulldf.describe().T.style.format(precision=2).background_gradient("Blues"), | |
use_container_width=True) | |
st.markdown("---") | |
st.subheader("๐ย Optionalย Exploratoryย Visuals") | |
num_cols = fulldf.select_dtypes("number").columns.tolist() | |
if st.checkbox("Histogram"): | |
st.plotly_chart(histogram_tool(csv_path, st.selectbox("Var", num_cols, key="hist")), | |
use_container_width=True) | |
if st.checkbox("Scatterย Matrix"): | |
sel = st.multiselect("Columns", num_cols, default=num_cols[:3]) | |
if sel: | |
st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True) | |
if st.checkbox("Correlationย Heatโmap"): | |
st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True) | |