Spaces:
Sleeping
Sleeping
File size: 6,639 Bytes
b5d6aaa dc51ef8 b5d6aaa 5f67bb9 b5d6aaa 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 0a40e29 5f67bb9 0a40e29 5f67bb9 29aad45 b5d6aaa dc51ef8 b5d6aaa dc51ef8 b5d6aaa dc51ef8 5f67bb9 dc51ef8 b5d6aaa 5f67bb9 dc51ef8 b5d6aaa dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 5f67bb9 dc51ef8 b5d6aaa dc51ef8 b5d6aaa 5f67bb9 b5d6aaa 8a0173b dc51ef8 b5d6aaa 5f67bb9 b5d6aaa 5f67bb9 b5d6aaa 5f67bb9 b5d6aaa dc51ef8 b5d6aaa 5f67bb9 b5d6aaa 5f67bb9 b5d6aaa dc51ef8 b5d6aaa dc51ef8 b5d6aaa 5f67bb9 b5d6aaa 5f67bb9 b5d6aaa 0a40e29 b5d6aaa dc51ef8 b5d6aaa dc51ef8 0a40e29 b5d6aaa 5f67bb9 b5d6aaa dc51ef8 b5d6aaa 5f67bb9 b5d6aaa dc51ef8 b5d6aaa 5f67bb9 b5d6aaa 5f67bb9 b5d6aaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
"""app.py โ BizIntel AI Ultra (Geminiโonly, v4)
A productionโgrade BI copilot with:
โข CSVโฏ/โฏExcelโฏ/โฏParquet and SQL ingestion
โข Smart sampling + memoryโsafe loading for large files
โข Schema + missing-data audit with Gemini-generated insights
โข Drill-down EDA (histogram, violin, scatter-matrix, heatmap)
โข Autoโdetected date column, tunable ARIMA forecasting
โข One-click strategy brief download (Markdown)
"""
from __future__ import annotations
import os, io, tempfile
from pathlib import Path
from typing import List
import pandas as pd
import streamlit as st
import plotly.express as px
from statsmodels.tsa.arima.model import ARIMA
from sqlalchemy import create_engine
import google.generativeai as genai
# โโโโโโโโโโโโโโโโโโโโโ 0โฏยทโฏCONFIGURATION โโโโโโโโโโโโโโโโโโโโโ
API_KEY = st.secrets.get("GEMINI_APIKEY") or os.getenv("GEMINI_APIKEY")
if not API_KEY:
st.error("โ Missing `GEMINI_APIKEY` โ add it in Settings โ Secrets or set env variable.")
st.stop()
st.set_page_config("BizIntelย AIย Ultra", "๐", "wide", initial_sidebar_state="expanded")
genai.configure(api_key=API_KEY)
GEM_MODEL = "gemini-1.5-pro-latest"
TMP = Path(tempfile.gettempdir())
# โโโโโโโโโโโโโโโโโโโโโ 1โฏยทโฏUTILITY HELPERS โโโโโโโโโโโโโโโโโโโ
@st.cache_data(show_spinner=False)
def read_file(buf: io.BufferedReader, sample: bool = False) -> pd.DataFrame:
suf = Path(buf.name).suffix.lower()
if suf in {".xls", ".xlsx"}:
return pd.read_excel(buf, engine="openpyxl")
if suf == ".parquet":
return pd.read_parquet(buf)
return pd.read_csv(buf, nrows=5_000_000 if sample else None)
@st.cache_data(show_spinner=False)
def sql_tables(uri: str) -> List[str]:
return create_engine(uri).table_names()
@st.cache_data(show_spinner=True)
def read_table(uri: str, tbl: str) -> pd.DataFrame:
return pd.read_sql_table(tbl, create_engine(uri))
@st.cache_data(show_spinner=False)
def ask_gemini(prompt: str) -> str:
return genai.GenerativeModel(GEM_MODEL).generate_content(prompt).text.strip()
# โโโโโโโโโโโโโโโโโโโโโ 2โฏยทโฏDATA INGESTION โโโโโโโโโโโโโโโโโโโโ
st.title("๐ BizIntel AI Ultra โ Gemini 1.5 Pro BI Copilot")
mode = st.sidebar.radio("Select Data Source", ["Upload File", "SQL Database"], horizontal=True)
df: pd.DataFrame = pd.DataFrame()
if mode == "Upload File":
upl = st.sidebar.file_uploader("Upload CSV / Excel / Parquet", ["csv", "xls", "xlsx", "parquet"], help="โค2โฏGB")
sample = st.sidebar.checkbox("Load sample (โค 5M rows)")
if upl:
df = read_file(upl, sample)
else:
uri = st.sidebar.text_input("SQLAlchemy URI")
if uri:
tbl = st.sidebar.selectbox("Choose Table", sql_tables(uri))
if tbl:
df = read_table(uri, tbl)
if df.empty:
st.info("โฌ
๏ธ Load a dataset to get started.")
st.stop()
st.success("โ
Data loaded")
st.dataframe(df.head(), use_container_width=True)
# โโโโโโโโโโโโโโโโโโโโโ 3โฏยทโฏSUMMARY + GEMINI โโโโโโโโโโโโโโโโโโโ
rows, cols = df.shape
miss_pct = df.isna().sum().sum() / (rows * cols) * 100
c1, c2, c3 = st.columns(3)
c1.metric("Rows", f"{rows:,}")
c2.metric("Columns", cols)
c3.metric("Missing %", f"{miss_pct:.1f}")
st.subheader("๐ง Gemini Insights")
with st.spinner("Generating analysis..."):
summary = df.describe(include="all", datetime_is_numeric=True).round(2).to_json()
st.markdown(ask_gemini(
"You are a senior BI analyst. List 5 key insights and 3 action items based on this dataset: " + summary
))
# โโโโโโโโโโโโโโโโโโโโโ 4โฏยทโฏTIME SERIES SETUP โโโโโโโโโโโโโโโโโ
# try datetime coercion
for c in df.columns:
if not pd.api.types.is_datetime64_any_dtype(df[c]):
try:
df[c] = pd.to_datetime(df[c])
except: pass
DATE_COL = st.selectbox("Date column", [c for c in df.columns if pd.api.types.is_datetime64_any_dtype(df[c])])
METRIC_COL = st.selectbox("Numeric metric", [c for c in df.select_dtypes("number").columns])
series = (
df[[DATE_COL, METRIC_COL]].dropna()
.groupby(DATE_COL)[METRIC_COL].mean().sort_index()
)
fig_ts = px.line(series, title=f"{METRIC_COL} Trend", labels={"index": "Date", METRIC_COL: METRIC_COL})
st.plotly_chart(fig_ts, use_container_width=True)
# โโโโโโโโโโโโโโโโโโโโโ 5โฏยทโฏARIMA FORECASTING โโโโโโโโโโโโโโโโโ
st.subheader("๐ฎ Forecast")
steps = st.slider("Forecast Horizon", 3, 365, 90)
p = st.number_input("AR Order (p)", 0, 5, 1)
d = st.number_input("Diff Order (d)", 0, 2, 1)
q = st.number_input("MA Order (q)", 0, 5, 1)
with st.spinner("Training ARIMA model..."):
model = ARIMA(series, order=(p, d, q)).fit()
fut_idx = pd.date_range(series.index[-1], periods=steps + 1, freq=pd.infer_freq(series.index) or "D")[1:]
forecast = pd.Series(model.forecast(steps), index=fut_idx)
fig_fc = px.line(pd.concat([series, forecast.rename("Forecast")], axis=1), title="Actual vs Forecast")
st.plotly_chart(fig_fc, use_container_width=True)
# โโโโโโโโโโโโโโโโโโโโโ 6โฏยทโฏEDA TOOLS โโโโโโโโโโโโโโโโโโโโโโโโโ
st.subheader("๐ Exploratory Data Dashboard")
with st.expander("Histogram + Box"):
col = st.selectbox("Metric column", METRIC_COL, key="hist")
st.plotly_chart(px.histogram(df, x=col, marginal="box", template="plotly_dark"), use_container_width=True)
with st.expander("Correlation Heatmap"):
corr = df.select_dtypes("number").corr()
st.plotly_chart(px.imshow(corr, color_continuous_scale="RdBu", aspect="auto", title="Correlation Matrix"), use_container_width=True)
# โโโโโโโโโโโโโโโโโโโโโ 7โฏยทโฏSTRATEGY DOWNLOAD โโโโโโโโโโโโโโโโ
brief = (
"# Strategy Brief\n"
"* Clean missing date values for better time modeling.\n"
"* Investigate top correlations for potential drivers.\n"
"* Leverage forecast for inventory and staff planning.\n"
"* Watch for outliers >3ฯ weekly.\n"
"* Segment by region and product for precise actions."
)
st.download_button("โฌ๏ธ Download Strategy (.md)", brief, "bizintel_brief.md", "text/markdown")
|