Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

BizIntel_AI / app.py

mgbam

Update app.py

0a40e29 verified about 1 month ago

raw

history blame

8.03 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import tempfile
	from io import BytesIO
	from sqlalchemy import create_engine
	import plotly.express as px
	import matplotlib.pyplot as plt
	from statsmodels.tsa.arima.model import ARIMA

	# ── Helpers to read CSV/Excel robustly ───────────────────────────────────────────
	@st.cache_data
	def load_file(uploaded):
	"""Read a CSV or Excel file into a DataFrame."""
	try:
	if uploaded.name.lower().endswith((".xls", ".xlsx")):
	return pd.read_excel(uploaded, engine="openpyxl")
	else:
	return pd.read_csv(uploaded)
	except Exception as e:
	raise st.Error(f"Error parsing file: {e}")

	# ── Helpers for SQL database ────────────────────────────────────────────────────
	SUPPORTED_ENGINES = ["postgresql", "mysql", "mssql+pyodbc", "oracle+cx_oracle"]
	@st.cache_data
	def list_tables(connection_string):
	engine = create_engine(connection_string)
	return engine.table_names()

	@st.cache_data
	def fetch_table(connection_string, table_name):
	engine = create_engine(connection_string)
	return pd.read_sql_table(table_name, engine)

	# ── Streamlit page setup ────────────────────────────────────────────────────────
	st.set_page_config(
	page_title="BizIntel AI Ultra",
	layout="wide",
	initial_sidebar_state="expanded",
	)
	st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")

	# ── Data source selection ───────────────────────────────────────────────────────
	data_source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])

	df = None
	if data_source == "Upload CSV / Excel":
	uploaded = st.file_uploader(
	"Drag & drop file here (≤ 500 MB)",
	type=["csv", "xls", "xlsx"],
	accept_multiple_files=False,
	)
	if uploaded:
	with st.spinner("Loading file…"):
	df = load_file(uploaded)
	st.success("✅ File loaded into memory")
	elif data_source == "Connect to SQL Database":
	engine = st.selectbox("Select DB engine", SUPPORTED_ENGINES)
	conn_str = st.text_input("Connection string (SQLAlchemy format)", placeholder="e.g. postgresql://user:pass@host:port/dbname")
	if conn_str:
	tables = list_tables(conn_str)
	table = st.selectbox("Choose table", tables)
	if table:
	with st.spinner(f"Fetching `{table}`…"):
	df = fetch_table(conn_str, table)
	st.success(f"✅ `{table}` loaded from database")

	# ── If DataFrame is ready, show overview and proceed ───────────────────────────
	if df is not None:
	st.markdown("### 🗂️ Preview")
	st.dataframe(df.head(5), use_container_width=True)

	# Dataset overview metrics
	n_rows, n_cols = df.shape
	missing_pct = (df.isna().sum().sum() / (n_rows * n_cols)) * 100
	st.markdown("---")
	c1, c2, c3 = st.columns(3)
	c1.metric("Rows", f"{n_rows:,}")
	c2.metric("Columns", f"{n_cols:,}")
	c3.metric("Missing %", f"{missing_pct:.1f}%")

	# Detailed stats
	st.markdown("#### 📋 Detailed descriptive statistics")
	st.dataframe(df.describe(include="all").transpose(), use_container_width=True)

	# Optional exploratory visuals
	st.markdown("---")
	st.markdown("#### 🔎 Optional Exploratory Visuals")
	col1, col2, col3 = st.columns(3)
	with col1:
	if st.checkbox("Histogram"):
	num_cols = df.select_dtypes(include="number").columns.tolist()
	col = st.selectbox("Choose numeric column for histogram", num_cols, key="hist")
	fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}")
	st.plotly_chart(fig, use_container_width=True)
	with col2:
	if st.checkbox("Scatter matrix"):
	num_cols = df.select_dtypes(include="number").columns.tolist()[:6] # limit to first 6
	fig = px.scatter_matrix(df[num_cols], dimensions=num_cols, title="Scatter Matrix")
	st.plotly_chart(fig, use_container_width=True)
	with col3:
	if st.checkbox("Correlation heatmap"):
	corr = df.select_dtypes(include="number").corr()
	fig, ax = plt.subplots(figsize=(6, 5))
	im = ax.imshow(corr, vmin=-1, vmax=1, cmap="RdBu")
	plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right")
	plt.yticks(range(len(corr)), corr.columns)
	plt.colorbar(im, ax=ax)
	st.pyplot(fig)

	# ── Trend & Forecast ──────────────────────────────────────────────────────
	st.markdown("---")
	st.markdown("### 📈 Trend & Forecast")
	# pick date/time column
	dt_cols = df.columns[df.dtypes.isin([np.dtype("datetime64[ns]"), np.dtype("object")])].tolist()
	date_col = st.selectbox("Select date/time column", dt_cols)
	df[date_col] = pd.to_datetime(df[date_col], errors="coerce")

	# pick numeric metric
	num_cols = df.select_dtypes(include="number").columns.tolist()
	metric_col = st.selectbox("Select numeric metric", num_cols)

	# prepare time series
	ts = df[[date_col, metric_col]].dropna()
	ts = ts.set_index(date_col).sort_index()
	ts = ts[~ts.index.duplicated(keep="first")]

	# Trend plot
	fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time")
	st.plotly_chart(fig_trend, use_container_width=True)

	# Forecast next 90 days with ARIMA
	with st.spinner("Running 90-day forecast…"):
	try:
	model = ARIMA(ts, order=(1, 1, 1)).fit()
	fcast = model.get_forecast(90)
	idx = pd.date_range(ts.index.max(), periods=91, freq="D")[1:]
	df_f = pd.DataFrame({"forecast": fcast.predicted_mean}, index=idx)

	fig_fc = px.line(
	pd.concat([ts, df_f], axis=1),
	labels={metric_col: metric_col, "forecast": "Forecast"},
	title=f"{metric_col} & 90-Day Forecast",
	)
	st.plotly_chart(fig_fc, use_container_width=True)
	except Exception as e:
	st.error(f"Forecast failed: {e}")

	# ── Strategy Recommendations ─────────────────────────────────────────────
	st.markdown("---")
	st.markdown("### 🚀 Strategy Recommendations")
	st.markdown(
	"""
	1. Data Quality First
	Address any missing or malformed dates before further time-series analysis.

	2. Trend & Seasonality
	Investigate any upward/downward trends and repeating seasonal patterns.

	3. Outlier Management
	Identify extreme highs/lows in your metric—could be bulk orders or data errors.

	4. Segment-Level Analysis
	Drill into key dimensions (e.g. region, product) to tailor growth strategies.

	5. Predict & Act
	Use your 90-day forecasts to guide inventory, staffing, and marketing decisions.
	"""
	)

	# downloadable strategy as markdown
	strategy_md = st.session_state.get("strategy_md", "")
	if not strategy_md:
	strategy_md = st.session_state["strategy_md"] = st.container().markdown("…") # dummy to store

	st.download_button(
	"📥 Download Strategy (.md)",
	data="""
	# BizIntel AI Ultra – Strategy Recommendations

	1. Data Quality First: …
	2. Trend & Seasonality: …
	3. Outlier Management: …
	4. Segment-Level Analysis: …
	5. Predict & Act: …
	""",
	file_name="strategy.md",
	mime="text/markdown",
	)