mgbam commited on
Commit
4778379
ยท
verified ยท
1 Parent(s): e4b2570

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +263 -183
app.py CHANGED
@@ -1,189 +1,269 @@
1
- import streamlit as st
2
- import pandas as pd
 
 
 
 
 
 
 
 
3
  import numpy as np
4
- import tempfile
5
- from io import BytesIO
6
- from sqlalchemy import create_engine
7
- import plotly.express as px
8
- import matplotlib.pyplot as plt
9
  from statsmodels.tsa.arima.model import ARIMA
 
 
 
 
 
10
 
11
- # โ”€โ”€ Helpers to read CSV/Excel robustly โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
12
- @st.cache_data
13
- def load_file(uploaded):
14
- """Read a CSV or Excel file into a DataFrame."""
15
- try:
16
- if uploaded.name.lower().endswith((".xls", ".xlsx")):
17
- return pd.read_excel(uploaded, engine="openpyxl")
18
- else:
19
- return pd.read_csv(uploaded)
20
- except Exception as e:
21
- raise st.Error(f"Error parsing file: {e}")
22
-
23
- # โ”€โ”€ Helpers for SQL database โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
24
- SUPPORTED_ENGINES = ["postgresql", "mysql", "mssql+pyodbc", "oracle+cx_oracle"]
25
- @st.cache_data
26
- def list_tables(connection_string):
27
- engine = create_engine(connection_string)
28
- return engine.table_names()
29
-
30
- @st.cache_data
31
- def fetch_table(connection_string, table_name):
32
- engine = create_engine(connection_string)
33
- return pd.read_sql_table(table_name, engine)
34
-
35
- # โ”€โ”€ Streamlit page setup โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
36
- st.set_page_config(
37
- page_title="BizIntel AI Ultra",
38
- layout="wide",
39
- initial_sidebar_state="expanded",
40
  )
41
- st.title("๐Ÿ“Š BizIntel AI Ultra โ€“ Advanced Analytics + Gemini 1.5 Pro")
42
-
43
- # โ”€โ”€ Data source selection โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
44
- data_source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
45
-
46
- df = None
47
- if data_source == "Upload CSV / Excel":
48
- uploaded = st.file_uploader(
49
- "Drag & drop file here (โ‰ค 500 MB)",
50
- type=["csv", "xls", "xlsx"],
51
- accept_multiple_files=False,
52
- )
53
- if uploaded:
54
- with st.spinner("Loading fileโ€ฆ"):
55
- df = load_file(uploaded)
56
- st.success("โœ… File loaded into memory")
57
- elif data_source == "Connect to SQL Database":
58
- engine = st.selectbox("Select DB engine", SUPPORTED_ENGINES)
59
- conn_str = st.text_input("Connection string (SQLAlchemy format)", placeholder="e.g. postgresql://user:pass@host:port/dbname")
60
- if conn_str:
61
- tables = list_tables(conn_str)
62
- table = st.selectbox("Choose table", tables)
63
- if table:
64
- with st.spinner(f"Fetching `{table}`โ€ฆ"):
65
- df = fetch_table(conn_str, table)
66
- st.success(f"โœ… `{table}` loaded from database")
67
-
68
- # โ”€โ”€ If DataFrame is ready, show overview and proceed โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
69
- if df is not None:
70
- st.markdown("### ๐Ÿ—‚๏ธ Preview")
71
- st.dataframe(df.head(5), use_container_width=True)
72
-
73
- # Dataset overview metrics
74
- n_rows, n_cols = df.shape
75
- missing_pct = (df.isna().sum().sum() / (n_rows * n_cols)) * 100
76
- st.markdown("---")
77
- c1, c2, c3 = st.columns(3)
78
- c1.metric("Rows", f"{n_rows:,}")
79
- c2.metric("Columns", f"{n_cols:,}")
80
- c3.metric("Missing %", f"{missing_pct:.1f}%")
81
-
82
- # Detailed stats
83
- st.markdown("#### ๐Ÿ“‹ Detailed descriptive statistics")
84
- st.dataframe(df.describe(include="all").transpose(), use_container_width=True)
85
-
86
- # Optional exploratory visuals
87
- st.markdown("---")
88
- st.markdown("#### ๐Ÿ”Ž Optional Exploratory Visuals")
89
- col1, col2, col3 = st.columns(3)
90
- with col1:
91
- if st.checkbox("Histogram"):
92
- num_cols = df.select_dtypes(include="number").columns.tolist()
93
- col = st.selectbox("Choose numeric column for histogram", num_cols, key="hist")
94
- fig = px.histogram(df, x=col, nbins=30, title=f"Histogram of {col}")
95
- st.plotly_chart(fig, use_container_width=True)
96
- with col2:
97
- if st.checkbox("Scatter matrix"):
98
- num_cols = df.select_dtypes(include="number").columns.tolist()[:6] # limit to first 6
99
- fig = px.scatter_matrix(df[num_cols], dimensions=num_cols, title="Scatter Matrix")
100
- st.plotly_chart(fig, use_container_width=True)
101
- with col3:
102
- if st.checkbox("Correlation heatmap"):
103
- corr = df.select_dtypes(include="number").corr()
104
- fig, ax = plt.subplots(figsize=(6, 5))
105
- im = ax.imshow(corr, vmin=-1, vmax=1, cmap="RdBu")
106
- plt.xticks(range(len(corr)), corr.columns, rotation=45, ha="right")
107
- plt.yticks(range(len(corr)), corr.columns)
108
- plt.colorbar(im, ax=ax)
109
- st.pyplot(fig)
110
-
111
- # โ”€โ”€ Trend & Forecast โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
112
- st.markdown("---")
113
- st.markdown("### ๐Ÿ“ˆ Trend & Forecast")
114
- # pick date/time column
115
- dt_cols = df.columns[df.dtypes.isin([np.dtype("datetime64[ns]"), np.dtype("object")])].tolist()
116
- date_col = st.selectbox("Select date/time column", dt_cols)
117
- df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
118
-
119
- # pick numeric metric
120
- num_cols = df.select_dtypes(include="number").columns.tolist()
121
- metric_col = st.selectbox("Select numeric metric", num_cols)
122
-
123
- # prepare time series
124
- ts = df[[date_col, metric_col]].dropna()
125
- ts = ts.set_index(date_col).sort_index()
126
- ts = ts[~ts.index.duplicated(keep="first")]
127
-
128
- # Trend plot
129
- fig_trend = px.line(ts, y=metric_col, title=f"{metric_col} over Time")
130
- st.plotly_chart(fig_trend, use_container_width=True)
131
-
132
- # Forecast next 90 days with ARIMA
133
- with st.spinner("Running 90-day forecastโ€ฆ"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  try:
135
- model = ARIMA(ts, order=(1, 1, 1)).fit()
136
- fcast = model.get_forecast(90)
137
- idx = pd.date_range(ts.index.max(), periods=91, freq="D")[1:]
138
- df_f = pd.DataFrame({"forecast": fcast.predicted_mean}, index=idx)
139
-
140
- fig_fc = px.line(
141
- pd.concat([ts, df_f], axis=1),
142
- labels={metric_col: metric_col, "forecast": "Forecast"},
143
- title=f"{metric_col} & 90-Day Forecast",
144
- )
145
- st.plotly_chart(fig_fc, use_container_width=True)
146
  except Exception as e:
147
- st.error(f"Forecast failed: {e}")
148
-
149
- # โ”€โ”€ Strategy Recommendations โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
150
- st.markdown("---")
151
- st.markdown("### ๐Ÿš€ Strategy Recommendations")
152
- st.markdown(
153
- """
154
- 1. **Data Quality First**
155
- Address any missing or malformed dates before further time-series analysis.
156
-
157
- 2. **Trend & Seasonality**
158
- Investigate any upward/downward trends and repeating seasonal patterns.
159
-
160
- 3. **Outlier Management**
161
- Identify extreme highs/lows in your metricโ€”could be bulk orders or data errors.
162
-
163
- 4. **Segment-Level Analysis**
164
- Drill into key dimensions (e.g. region, product) to tailor growth strategies.
165
-
166
- 5. **Predict & Act**
167
- Use your 90-day forecasts to guide inventory, staffing, and marketing decisions.
168
- """
169
- )
170
-
171
- # downloadable strategy as markdown
172
- strategy_md = st.session_state.get("strategy_md", "")
173
- if not strategy_md:
174
- strategy_md = st.session_state["strategy_md"] = st.container().markdown("โ€ฆ") # dummy to store
175
-
176
- st.download_button(
177
- "๐Ÿ“ฅ Download Strategy (.md)",
178
- data="""
179
- # BizIntel AI Ultra โ€“ Strategy Recommendations
180
-
181
- 1. Data Quality First: โ€ฆ
182
- 2. Trend & Seasonality: โ€ฆ
183
- 3. Outlier Management: โ€ฆ
184
- 4. Segment-Level Analysis: โ€ฆ
185
- 5. Predict & Act: โ€ฆ
186
- """,
187
- file_name="strategy.md",
188
- mime="text/markdown",
189
- )
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py โ€” BizIntelย AIย Ultraย v2
2
+ # =============================================================
3
+ # CSVย /ย Excelย /ย DB ingestion โ€ข Trend + ARIMA forecast (90ย d or 3ย steps)
4
+ # Confidence bands โ€ข Model explainability โ€ข Geminiย 1.5 Pro strategy
5
+ # Safe Plotly writes -> /tmp โ€ข KPI cards โ€ข Optional EDA visuals
6
+ # =============================================================
7
+
8
+ import os, tempfile, warnings
9
+ from typing import List
10
+
11
  import numpy as np
12
+ import pandas as pd
13
+ import streamlit as st
14
+ import plotly.graph_objects as go
 
 
15
  from statsmodels.tsa.arima.model import ARIMA
16
+ from statsmodels.graphics.tsaplots import plot_acf
17
+ from statsmodels.tsa.seasonal import seasonal_decompose
18
+ from statsmodels.tools.sm_exceptions import ConvergenceWarning
19
+ import google.generativeai as genai
20
+ import matplotlib.pyplot as plt
21
 
22
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
23
+ # 0) Plotly safe write โ†’ /tmp
24
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
25
+ TMP = tempfile.gettempdir()
26
+ orig_write = go.Figure.write_image
27
+ go.Figure.write_image = lambda self, p, *a, **k: orig_write(
28
+ self, os.path.join(TMP, os.path.basename(p)), *a, **k
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  )
30
+
31
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
32
+ # 1) Local helpers & DB connector
33
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
34
+ from tools.csv_parser import parse_csv_tool
35
+ from tools.plot_generator import plot_metric_tool
36
+ from tools.visuals import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
37
+ from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
38
+
39
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
40
+ # 2) Gemini 1.5ย Pro
41
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
42
+ genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
43
+ gemini = genai.GenerativeModel(
44
+ "gemini-1.5-pro-latest",
45
+ generation_config=dict(temperature=0.7, top_p=0.9, response_mime_type="text/plain"),
46
+ )
47
+
48
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
49
+ # 3) Streamlit setup
50
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
51
+ st.set_page_config(page_title="BizIntelย AIย Ultra", layout="wide")
52
+ st.title("๐Ÿ“Šย BizIntelย AIย Ultraย โ€“ Advanced Analyticsย +ย Geminiย 1.5ย Pro")
53
+
54
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
55
+ # 4) Data source
56
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
57
+ choice = st.radio("Select data source", ["Upload CSVย /ย Excel", "Connect to SQL Database"])
58
+ csv_path: str | None = None
59
+
60
+ if choice.startswith("Upload"):
61
+ up = st.file_uploader("CSVย orย Excelย (โ‰คโ€ฏ500โ€ฏMB)", type=["csv","xlsx","xls"])
62
+ if up:
63
+ tmp = os.path.join(TMP, up.name)
64
+ with open(tmp, "wb") as f: f.write(up.read())
65
+ if up.name.lower().endswith(".csv"):
66
+ csv_path = tmp
67
+ else:
68
+ try:
69
+ pd.read_excel(tmp, sheet_name=0).to_csv(tmp+".csv", index=False)
70
+ csv_path = tmp+".csv"
71
+ except Exception as e:
72
+ st.error(f"Excel parse failed: {e}")
73
+ else:
74
+ eng = st.selectbox("DB engine", SUPPORTED_ENGINES)
75
+ conn = st.text_input("SQLAlchemyย connection string")
76
+ if conn:
77
+ try:
78
+ tbl = st.selectbox("Table", list_tables(conn))
79
+ if st.button("Fetch table"):
80
+ csv_path = fetch_data_from_db(conn, tbl)
81
+ st.success(f"Fetched **{tbl}**")
82
+ except Exception as e:
83
+ st.error(f"DB error: {e}")
84
+
85
+ if not csv_path:
86
+ st.stop()
87
+
88
+ with open(csv_path, "rb") as f:
89
+ st.download_button("โฌ‡๏ธย Download working CSV", f, file_name=os.path.basename(csv_path))
90
+
91
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
92
+ # 5) Column selectors
93
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
94
+ df_head = pd.read_csv(csv_path, nrows=5)
95
+ st.dataframe(df_head)
96
+
97
+ date_col = st.selectbox("Date/time column", df_head.columns)
98
+ numeric_cols = df_head.select_dtypes("number").columns.tolist()
99
+ metric_options = [c for c in numeric_cols if c != date_col]
100
+ if not metric_options:
101
+ st.error("No numeric columns available apart from the date column.")
102
+ st.stop()
103
+ metric_col = st.selectbox("Numeric metric column", metric_options)
104
+
105
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
106
+ # 6) Summary & trend chart
107
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
108
+ summary = parse_csv_tool(csv_path)
109
+ trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
110
+ if isinstance(trend_fig, go.Figure):
111
+ st.subheader("๐Ÿ“ˆย Trend")
112
+ st.plotly_chart(trend_fig, use_container_width=True)
113
+ else:
114
+ st.warning(trend_fig)
115
+
116
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
117
+ # 7) Robust ARIMA + explainability
118
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
119
+ def build_series(path, dcol, vcol):
120
+ df = pd.read_csv(path, usecols=[dcol, vcol])
121
+ df[dcol] = pd.to_datetime(df[dcol], errors="coerce")
122
+ df[vcol] = pd.to_numeric(df[vcol], errors="coerce")
123
+ df = df.dropna(subset=[dcol, vcol]).sort_values(dcol)
124
+ if df.empty or df[dcol].nunique() < 2:
125
+ raise ValueError("Need โ‰ฅโ€ฏ2 valid timestamps.")
126
+ s = df.set_index(dcol)[vcol].groupby(level=0).mean().sort_index()
127
+ freq = pd.infer_freq(s.index) or "D"
128
+ s = s.asfreq(freq).interpolate()
129
+ return s, freq
130
+
131
+ @st.cache_data(show_spinner="Fitting ARIMAโ€ฆ")
132
+ def fit_arima(series):
133
+ warnings.simplefilter("ignore", ConvergenceWarning)
134
+ model = ARIMA(series, order=(1,1,1))
135
+ return model.fit()
136
+
137
+ try:
138
+ series, freq = build_series(csv_path, date_col, metric_col)
139
+ horizon = 90 if freq == "D" else 3
140
+ res = fit_arima(series)
141
+ fc = res.get_forecast(steps=horizon)
142
+ forecast = fc.predicted_mean
143
+ ci = fc.conf_int()
144
+ except Exception as e:
145
+ st.subheader(f"๐Ÿ”ฎย {metric_col}ย Forecast")
146
+ st.warning(f"Forecast failed: {e}")
147
+ series = forecast = ci = None
148
+
149
+ if forecast is not None:
150
+ # Plot with CI
151
+ fig = go.Figure()
152
+ fig.add_scatter(x=series.index, y=series, mode="lines", name=metric_col)
153
+ fig.add_scatter(x=forecast.index, y=forecast, mode="lines+markers", name="Forecast")
154
+ fig.add_scatter(x=ci.index, y=ci.iloc[:,1], mode="lines",
155
+ line=dict(width=0), showlegend=False)
156
+ fig.add_scatter(x=ci.index, y=ci.iloc[:,0], mode="lines",
157
+ line=dict(width=0), fill="tonexty",
158
+ fillcolor="rgba(255,0,0,0.25)", showlegend=False)
159
+ fig.update_layout(title=f"{metric_col} Forecast ({horizon}ย steps)",
160
+ template="plotly_dark", xaxis_title=date_col,
161
+ yaxis_title=metric_col)
162
+ st.subheader(f"๐Ÿ”ฎย {metric_col}ย Forecast")
163
+ st.plotly_chart(fig, use_container_width=True)
164
+
165
+ # ---------------- summary & interpretation ----------------
166
+ st.subheader("๐Ÿ“„ย Model Summary")
167
+ st.code(res.summary().as_text(), language="text")
168
+
169
+ st.subheader("๐Ÿ—’ย Coefficient Interpretation")
170
+ ar = res.arparams
171
+ ma = res.maparams
172
+ interp: List[str] = []
173
+ if ar.size:
174
+ interp.append(f"โ€ขย AR(1)ย ={ar[0]:.2f} โ†’ "
175
+ f"{'strong' if abs(ar[0])>0.5 else 'moderate'} "
176
+ "persistence in the series.")
177
+ if ma.size:
178
+ interp.append(f"โ€ขย MA(1)ย ={ma[0]:.2f} โ†’ "
179
+ f"{'large' if abs(ma[0])>0.5 else 'modest'} "
180
+ "shock adjustment.")
181
+ st.markdown("\n".join(interp) or "N/A")
182
+
183
+ # ---------------- Residual ACF ----------------
184
+ st.subheader("๐Ÿ”ย Residual Autocorrelation (ACF)")
185
+ plt.figure(figsize=(6,3))
186
+ plot_acf(res.resid.dropna(), lags=30, alpha=0.05)
187
+ acf_png = os.path.join(TMP, "acf.png")
188
+ plt.tight_layout()
189
+ plt.savefig(acf_png, dpi=120)
190
+ plt.close()
191
+ st.image(acf_png, use_container_width=True)
192
+
193
+ # ---------------- Backโ€‘test ----------------
194
+ k = max(int(len(series)*0.2), 10)
195
+ train, test = series[:-k], series[-k:]
196
+ bt_res = ARIMA(train, order=(1,1,1)).fit()
197
+ bt_pred = bt_res.forecast(k)
198
+ mape = (abs(bt_pred - test)/test).mean()*100
199
+ rmse = np.sqrt(((bt_pred - test)**2).mean())
200
+
201
+ st.subheader("๐Ÿงชย Backโ€‘test (last 20โ€ฏ%)")
202
+ colA, colB = st.columns(2)
203
+ colA.metric("MAPE", f"{mape:.2f}ย %")
204
+ colB.metric("RMSE", f"{rmse:,.0f}")
205
+
206
+ # ---------------- Optional seasonal decomposition -------
207
+ with st.expander("Seasonal Decomposition"):
208
  try:
209
+ period = {"D":7, "H":24, "M":12}.get(freq, None)
210
+ if period:
211
+ dec = seasonal_decompose(series, period=period, model="additive")
212
+ for comp in ["trend","seasonal","resid"]:
213
+ st.line_chart(getattr(dec, comp), height=150)
214
+ else:
215
+ st.info("Frequency not suited for decomposition.")
 
 
 
 
216
  except Exception as e:
217
+ st.info(f"Decomposition failed: {e}")
218
+
219
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
220
+ # 8) Gemini strategy report
221
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
222
+ prompt = (
223
+ "You are **BizIntel Strategist AI**.\n\n"
224
+ f"### Dataset Summary\n```\n{summary}\n```\n\n"
225
+ f"### {metric_col} Forecast\n```\n"
226
+ f"{forecast.to_string() if forecast is not None else 'N/A'}\n```\n\n"
227
+ "Craft a Markdown report:\n"
228
+ "1. Five insights\n2. Three actionable strategies\n"
229
+ "3. Risksย / anomalies\n4. Extra visuals to consider."
230
+ )
231
+ with st.spinner("Gemini generating strategyโ€ฆ"):
232
+ md = gemini.generate_content(prompt).text
233
+ st.subheader("๐Ÿš€ย Strategyย Recommendationsย (Geminiย 1.5ย Pro)")
234
+ st.markdown(md)
235
+ st.download_button("โฌ‡๏ธย Downloadย Strategy (.md)", md, file_name="strategy.md")
236
+
237
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
238
+ # 9) KPI cards + detailed stats + optional EDA (unchanged)
239
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
240
+ fulldf = pd.read_csv(csv_path, low_memory=False)
241
+ rows, cols = fulldf.shape
242
+ miss_pct = fulldf.isna().mean().mean()*100
243
+
244
+ st.markdown("---")
245
+ st.subheader("๐Ÿ“‘ย Datasetย Overview")
246
+ c1,c2,c3 = st.columns(3)
247
+ c1.metric("Rows", f"{rows:,}")
248
+ c2.metric("Columns", cols)
249
+ c3.metric("Missingย %", f"{miss_pct:.1f}%")
250
+
251
+ with st.expander("Descriptiveย Statistics"):
252
+ st.dataframe(fulldf.describe().T.style.format(precision=2).background_gradient("Blues"),
253
+ use_container_width=True)
254
+
255
+ st.markdown("---")
256
+ st.subheader("๐Ÿ”ย Optionalย Exploratoryย Visuals")
257
+ num_cols = fulldf.select_dtypes("number").columns.tolist()
258
+
259
+ if st.checkbox("Histogram"):
260
+ st.plotly_chart(histogram_tool(csv_path, st.selectbox("Var", num_cols, key="hist")),
261
+ use_container_width=True)
262
+
263
+ if st.checkbox("Scatterย Matrix"):
264
+ sel = st.multiselect("Columns", num_cols, default=num_cols[:3])
265
+ if sel:
266
+ st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)
267
+
268
+ if st.checkbox("Correlationย Heatโ€‘map"):
269
+ st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)