File size: 10,266 Bytes
49d873b
092c2a9
63a9cd3
e490e03
309eec4
 
773f0cf
2c362d2
309eec4
10c7dea
3acbc9c
309eec4
221fe0a
63a9cd3
221fe0a
092c2a9
63a9cd3
221fe0a
092c2a9
 
63a9cd3
221fe0a
 
 
63a9cd3
221fe0a
49d873b
63a9cd3
 
49d873b
 
10c7dea
e490e03
092c2a9
e490e03
10c7dea
2c362d2
b3a1f0c
221fe0a
 
 
 
 
2c362d2
67e3963
e490e03
092c2a9
e490e03
221fe0a
 
72a73ff
 
e490e03
092c2a9
e490e03
221fe0a
092c2a9
f0be302
221fe0a
092c2a9
49d873b
092c2a9
 
49d873b
773f0cf
49d873b
092c2a9
221fe0a
f0be302
092c2a9
63a9cd3
9414ba2
f0be302
 
 
49d873b
221fe0a
092c2a9
773f0cf
9414ba2
c7c64f3
773f0cf
221fe0a
092c2a9
c7c64f3
092c2a9
 
773f0cf
 
 
 
092c2a9
309eec4
 
092c2a9
773f0cf
221fe0a
10c7dea
773f0cf
63a9cd3
773f0cf
9414ba2
 
 
63a9cd3
 
 
 
49d873b
63a9cd3
 
 
 
 
10c7dea
e490e03
092c2a9
e490e03
f0be302
2c362d2
10c7dea
49d873b
9414ba2
 
092c2a9
9414ba2
773f0cf
9414ba2
10c7dea
092c2a9
9414ba2
e490e03
221fe0a
49d873b
 
 
 
 
773f0cf
e490e03
092c2a9
e490e03
523228c
 
9414ba2
 
221fe0a
 
 
e490e03
 
523228c
 
221fe0a
773f0cf
3acbc9c
d826a13
221fe0a
c7c64f3
 
092c2a9
c7c64f3
63a9cd3
 
 
c7c64f3
 
f0be302
 
092c2a9
63a9cd3
 
092c2a9
523228c
221fe0a
63a9cd3
 
 
 
 
 
 
 
 
 
773f0cf
e490e03
092c2a9
e490e03
523228c
3acbc9c
 
773f0cf
092c2a9
 
d826a13
63a9cd3
9414ba2
d037161
 
3acbc9c
63a9cd3
773f0cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# app.py  โ€”  BizIntel AI Ultra  
# Supports: CSV/Excel/DB ingestion, date+metric plotting, ARIMA forecasting,
# safe Plotly writes into /tmp, Gemini 1.5 Pro strategy, KPI cards, optional EDA.

import os
import tempfile

import pandas as pd
import streamlit as st
import google.generativeai as genai
import plotly.graph_objects as go

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 0) Monkeyโ€patch Plotly to write images into /tmp (writable)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
TMP = tempfile.gettempdir()
_orig_write = go.Figure.write_image
def _safe_write(self, path, *args, **kwargs):
    filename = os.path.basename(path)
    safe_path = os.path.join(TMP, filename)
    return _orig_write(self, safe_path, *args, **kwargs)
go.Figure.write_image = _safe_write

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 1) Tool & DB imports
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
from tools.csv_parser      import parse_csv_tool
from tools.plot_generator  import plot_metric_tool
from tools.forecaster      import forecast_metric_tool
from tools.visuals         import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
from db_connector          import fetch_data_from_db, list_tables, SUPPORTED_ENGINES

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2) Gemini 1.5 Pro initialization
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
gemini = genai.GenerativeModel(
    "gemini-1.5-pro-latest",
    generation_config={
        "temperature": 0.7,
        "top_p": 0.9,
        "response_mime_type": "text/plain",
    },
)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 3) Streamlit page setup
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
st.title("๐Ÿ“Š BizIntel AI Ultra โ€“ Advanced Analytics + Gemini 1.5 Pro")
TEMP_DIR = tempfile.gettempdir()

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 4) Data source selection: CSV/Excel or SQL Database
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
csv_path = None

if source == "Upload CSV / Excel":
    upload = st.file_uploader("Upload CSV or Excel (โ‰ค 500 MB)", type=["csv","xlsx","xls"])
    if upload:
        tmp_file = os.path.join(TEMP_DIR, upload.name)
        with open(tmp_file, "wb") as f:
            f.write(upload.read())

        if upload.name.lower().endswith(".csv"):
            csv_path = tmp_file
        else:
            try:
                df_xl = pd.read_excel(tmp_file, sheet_name=0)
                csv_path = tmp_file.rsplit(".", 1)[0] + ".csv"
                df_xl.to_csv(csv_path, index=False)
            except Exception as e:
                st.error(f"Excel parsing failed: {e}")
                st.stop()
        st.success(f"{upload.name} saved โœ…")

else:
    engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
    conn   = st.text_input("SQLAlchemy connection string")
    if conn:
        try:
            tables = list_tables(conn)
            tbl    = st.selectbox("Table", tables)
            if st.button("Fetch table"):
                csv_path = fetch_data_from_db(conn, tbl)
                st.success(f"Fetched **{tbl}** as CSV โœ…")
        except Exception as e:
            st.error(f"Connection failed: {e}")
            st.stop()

if not csv_path:
    st.stop()

# Download the working CSV
with open(csv_path, "rb") as f:
    st.download_button("โฌ‡๏ธ Download working CSV", f, file_name=os.path.basename(csv_path))

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 5) Show head & pick date + metric (but never the same column)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
df_head = pd.read_csv(csv_path, nrows=5)
st.dataframe(df_head)

# a) Date dropdown over all columns
date_col = st.selectbox("Select date/time column", df_head.columns)

# b) Metric dropdown only numeric columns, excluding the chosen date_col
numeric_cols = df_head.select_dtypes("number").columns.tolist()
metric_options = [c for c in numeric_cols if c != date_col]
if not metric_options:
    st.error(f"No numeric columns available once we exclude '{date_col}'.")
    st.stop()
metric_col = st.selectbox("Select numeric metric column", metric_options)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 6) Local analysis: summary, trend chart, forecast
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with st.spinner("Parsing datasetโ€ฆ"):
    summary_text = parse_csv_tool(csv_path)

with st.spinner("Generating trend chartโ€ฆ"):
    trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
if isinstance(trend_fig, go.Figure):
    st.subheader("๐Ÿ“ˆ Trend")
    st.plotly_chart(trend_fig, use_container_width=True)
else:
    st.warning(trend_fig)

with st.spinner("Running forecastโ€ฆ"):
    forecast_text = forecast_metric_tool(csv_path, date_col, metric_col)

st.subheader(f"๐Ÿ”ฎ {metric_col} Forecast")
forecast_png = os.path.join(TEMP_DIR, "forecast_plot.png")
if os.path.exists(forecast_png):
    st.image(forecast_png, use_container_width=True)
else:
    st.warning("Forecast image not found.")

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 7) Gemini-driven strategy recommendations
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
prompt = (
    f"You are **BizIntel Strategist AI**.\n\n"
    f"### Dataset Summary\n```\n{summary_text}\n```\n\n"
    f"### {metric_col} Forecast\n```\n{forecast_text}\n```\n\n"
    "Return **Markdown** with:\n"
    "1. Five key insights\n"
    "2. Three actionable strategies\n"
    "3. Risk factors or anomalies\n"
    "4. Suggested additional visuals\n"
)

st.subheader("๐Ÿš€ Strategy Recommendations (Gemini 1.5 Pro)")
with st.spinner("Generating insightsโ€ฆ"):
    strategy_md = gemini.generate_content(prompt).text
st.markdown(strategy_md)
st.download_button("โฌ‡๏ธ Download Strategy (.md)", strategy_md, file_name="strategy.md")

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 8) KPI cards + detailed Stats
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
full_df     = pd.read_csv(csv_path, low_memory=False)
total_rows  = len(full_df)
num_columns = full_df.shape[1]
missing_pct = full_df.isna().mean().mean() * 100

st.markdown("---")
st.subheader("๐Ÿ“‘ Dataset Overview")
c1, c2, c3 = st.columns(3)
c1.metric("Rows",      f"{total_rows:,}")
c2.metric("Columns",   str(num_columns))
c3.metric("Missing %", f"{missing_pct:.1f}%")

with st.expander("๐Ÿ”Ž Detailed descriptive statistics"):
    stats_df = (
        full_df.describe()
        .T
        .reset_index()
        .rename(columns={"index":"Feature"})
    )
    st.dataframe(
        stats_df.style.format(precision=2).background_gradient(cmap="Blues"),
        use_container_width=True
    )

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 9) Optional Exploratory Visuals
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.markdown("---")
st.subheader("๐Ÿ” Optional Exploratory Visuals")

if st.checkbox("Histogram"):
    hcol = st.selectbox("Variable", numeric_cols, key="hist")
    st.plotly_chart(histogram_tool(csv_path, hcol), use_container_width=True)

if st.checkbox("Scatter Matrix"):
    sel = st.multiselect("Choose columns", numeric_cols, default=numeric_cols[:3])
    if sel:
        st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)

if st.checkbox("Correlation Heatmap"):
    st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)