BizIntel_AI / app.py
mgbam's picture
Update app.py
63a9cd3 verified
raw
history blame
10.3 kB
# app.py โ€” BizIntel AI Ultra
# Supports: CSV/Excel/DB ingestion, date+metric plotting, ARIMA forecasting,
# safe Plotly writes into /tmp, Gemini 1.5 Pro strategy, KPI cards, optional EDA.
import os
import tempfile
import pandas as pd
import streamlit as st
import google.generativeai as genai
import plotly.graph_objects as go
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 0) Monkeyโ€patch Plotly to write images into /tmp (writable)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
TMP = tempfile.gettempdir()
_orig_write = go.Figure.write_image
def _safe_write(self, path, *args, **kwargs):
filename = os.path.basename(path)
safe_path = os.path.join(TMP, filename)
return _orig_write(self, safe_path, *args, **kwargs)
go.Figure.write_image = _safe_write
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 1) Tool & DB imports
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
from tools.csv_parser import parse_csv_tool
from tools.plot_generator import plot_metric_tool
from tools.forecaster import forecast_metric_tool
from tools.visuals import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2) Gemini 1.5 Pro initialization
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
gemini = genai.GenerativeModel(
"gemini-1.5-pro-latest",
generation_config={
"temperature": 0.7,
"top_p": 0.9,
"response_mime_type": "text/plain",
},
)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 3) Streamlit page setup
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
st.title("๐Ÿ“Š BizIntel AI Ultra โ€“ Advanced Analytics + Gemini 1.5 Pro")
TEMP_DIR = tempfile.gettempdir()
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 4) Data source selection: CSV/Excel or SQL Database
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
csv_path = None
if source == "Upload CSV / Excel":
upload = st.file_uploader("Upload CSV or Excel (โ‰ค 500 MB)", type=["csv","xlsx","xls"])
if upload:
tmp_file = os.path.join(TEMP_DIR, upload.name)
with open(tmp_file, "wb") as f:
f.write(upload.read())
if upload.name.lower().endswith(".csv"):
csv_path = tmp_file
else:
try:
df_xl = pd.read_excel(tmp_file, sheet_name=0)
csv_path = tmp_file.rsplit(".", 1)[0] + ".csv"
df_xl.to_csv(csv_path, index=False)
except Exception as e:
st.error(f"Excel parsing failed: {e}")
st.stop()
st.success(f"{upload.name} saved โœ…")
else:
engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
conn = st.text_input("SQLAlchemy connection string")
if conn:
try:
tables = list_tables(conn)
tbl = st.selectbox("Table", tables)
if st.button("Fetch table"):
csv_path = fetch_data_from_db(conn, tbl)
st.success(f"Fetched **{tbl}** as CSV โœ…")
except Exception as e:
st.error(f"Connection failed: {e}")
st.stop()
if not csv_path:
st.stop()
# Download the working CSV
with open(csv_path, "rb") as f:
st.download_button("โฌ‡๏ธ Download working CSV", f, file_name=os.path.basename(csv_path))
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 5) Show head & pick date + metric (but never the same column)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
df_head = pd.read_csv(csv_path, nrows=5)
st.dataframe(df_head)
# a) Date dropdown over all columns
date_col = st.selectbox("Select date/time column", df_head.columns)
# b) Metric dropdown only numeric columns, excluding the chosen date_col
numeric_cols = df_head.select_dtypes("number").columns.tolist()
metric_options = [c for c in numeric_cols if c != date_col]
if not metric_options:
st.error(f"No numeric columns available once we exclude '{date_col}'.")
st.stop()
metric_col = st.selectbox("Select numeric metric column", metric_options)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 6) Local analysis: summary, trend chart, forecast
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with st.spinner("Parsing datasetโ€ฆ"):
summary_text = parse_csv_tool(csv_path)
with st.spinner("Generating trend chartโ€ฆ"):
trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
if isinstance(trend_fig, go.Figure):
st.subheader("๐Ÿ“ˆ Trend")
st.plotly_chart(trend_fig, use_container_width=True)
else:
st.warning(trend_fig)
with st.spinner("Running forecastโ€ฆ"):
forecast_text = forecast_metric_tool(csv_path, date_col, metric_col)
st.subheader(f"๐Ÿ”ฎ {metric_col} Forecast")
forecast_png = os.path.join(TEMP_DIR, "forecast_plot.png")
if os.path.exists(forecast_png):
st.image(forecast_png, use_container_width=True)
else:
st.warning("Forecast image not found.")
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 7) Gemini-driven strategy recommendations
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
prompt = (
f"You are **BizIntel Strategist AI**.\n\n"
f"### Dataset Summary\n```\n{summary_text}\n```\n\n"
f"### {metric_col} Forecast\n```\n{forecast_text}\n```\n\n"
"Return **Markdown** with:\n"
"1. Five key insights\n"
"2. Three actionable strategies\n"
"3. Risk factors or anomalies\n"
"4. Suggested additional visuals\n"
)
st.subheader("๐Ÿš€ Strategy Recommendations (Gemini 1.5 Pro)")
with st.spinner("Generating insightsโ€ฆ"):
strategy_md = gemini.generate_content(prompt).text
st.markdown(strategy_md)
st.download_button("โฌ‡๏ธ Download Strategy (.md)", strategy_md, file_name="strategy.md")
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 8) KPI cards + detailed Stats
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
full_df = pd.read_csv(csv_path, low_memory=False)
total_rows = len(full_df)
num_columns = full_df.shape[1]
missing_pct = full_df.isna().mean().mean() * 100
st.markdown("---")
st.subheader("๐Ÿ“‘ Dataset Overview")
c1, c2, c3 = st.columns(3)
c1.metric("Rows", f"{total_rows:,}")
c2.metric("Columns", str(num_columns))
c3.metric("Missing %", f"{missing_pct:.1f}%")
with st.expander("๐Ÿ”Ž Detailed descriptive statistics"):
stats_df = (
full_df.describe()
.T
.reset_index()
.rename(columns={"index":"Feature"})
)
st.dataframe(
stats_df.style.format(precision=2).background_gradient(cmap="Blues"),
use_container_width=True
)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 9) Optional Exploratory Visuals
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.markdown("---")
st.subheader("๐Ÿ” Optional Exploratory Visuals")
if st.checkbox("Histogram"):
hcol = st.selectbox("Variable", numeric_cols, key="hist")
st.plotly_chart(histogram_tool(csv_path, hcol), use_container_width=True)
if st.checkbox("Scatter Matrix"):
sel = st.multiselect("Choose columns", numeric_cols, default=numeric_cols[:3])
if sel:
st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)
if st.checkbox("Correlation Heatmap"):
st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)