File size: 10,128 Bytes
49d873b
 
 
e490e03
309eec4
 
f0be302
773f0cf
2c362d2
309eec4
10c7dea
3acbc9c
309eec4
221fe0a
49d873b
221fe0a
 
49d873b
221fe0a
 
 
 
49d873b
221fe0a
 
 
 
49d873b
221fe0a
49d873b
 
 
 
 
10c7dea
e490e03
49d873b
e490e03
10c7dea
2c362d2
b3a1f0c
221fe0a
 
 
 
 
2c362d2
67e3963
e490e03
49d873b
e490e03
221fe0a
 
67e3963
72a73ff
 
e490e03
49d873b
e490e03
221fe0a
f0be302
 
221fe0a
49d873b
 
 
f0be302
49d873b
773f0cf
49d873b
221fe0a
 
f0be302
9414ba2
f0be302
9414ba2
f0be302
 
 
49d873b
221fe0a
49d873b
773f0cf
9414ba2
c7c64f3
773f0cf
221fe0a
 
c7c64f3
221fe0a
 
773f0cf
 
 
 
 
309eec4
 
49d873b
773f0cf
221fe0a
10c7dea
773f0cf
49d873b
773f0cf
9414ba2
 
 
221fe0a
49d873b
221fe0a
10c7dea
e490e03
221fe0a
e490e03
f0be302
2c362d2
10c7dea
49d873b
9414ba2
 
 
773f0cf
9414ba2
10c7dea
221fe0a
9414ba2
e490e03
49d873b
221fe0a
49d873b
 
 
 
 
773f0cf
e490e03
49d873b
e490e03
523228c
 
9414ba2
 
221fe0a
 
 
e490e03
 
523228c
 
221fe0a
773f0cf
3acbc9c
d826a13
221fe0a
c7c64f3
 
49d873b
c7c64f3
9414ba2
c7c64f3
d037161
c7c64f3
 
f0be302
 
49d873b
 
 
 
523228c
221fe0a
c7c64f3
 
 
 
 
773f0cf
e490e03
c7c64f3
e490e03
523228c
3acbc9c
 
773f0cf
49d873b
 
d826a13
221fe0a
9414ba2
d037161
 
3acbc9c
221fe0a
773f0cf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# app.py  โ€”  BizIntel AI Ultra  
# Supports: CSV/Excel/DB ingestion, any numeric metric + date, interactive Plotly, Gemini 1.5 Pro,
# KPI cards, optional EDA, safe image writes, and updated use_container_width flag.

import os
import tempfile
from typing import Literal

import pandas as pd
import streamlit as st
import google.generativeai as genai
import plotly.graph_objects as go

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# REDIRECT ALL write_image CALLS TO A WRITABLE TEMP DIRECTORY
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
_tmp = tempfile.gettempdir()
_original_write = go.Figure.write_image

def _safe_write(self, path, *args, **kwargs):
    fname = os.path.basename(path)
    safe_path = os.path.join(_tmp, fname)
    return _original_write(self, safe_path, *args, **kwargs)

go.Figure.write_image = _safe_write

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# TOOL IMPORTS (updated generic versions)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
from tools.csv_parser      import parse_csv_tool
from tools.plot_generator  import plot_metric_tool      # date_col, metric_col
from tools.forecaster      import forecast_metric_tool  # date_col, metric_col
from tools.visuals         import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
from db_connector          import fetch_data_from_db, list_tables, SUPPORTED_ENGINES

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 1.  GEMINI 1.5-PRO INITIALIZATION
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
gemini = genai.GenerativeModel(
    "gemini-1.5-pro-latest",
    generation_config={
        "temperature": 0.7,
        "top_p": 0.9,
        "response_mime_type": "text/plain",
    },
)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2.  STREAMLIT PAGE CONFIG
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.set_page_config(page_title="BizIntel AI Ultra", layout="wide")
st.title("๐Ÿ“Š BizIntel AI Ultra โ€“ Advanced Analytics + Gemini 1.5 Pro")

TEMP_DIR = tempfile.gettempdir()

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 3.  DATA SOURCE: CSV / EXCEL / SQL DATABASE
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
csv_path: str | None = None

if source == "Upload CSV / Excel":
    upload = st.file_uploader("Upload CSV or Excel (โ‰ค 500 MB)", type=["csv", "xlsx", "xls"])
    if upload:
        temp_path = os.path.join(TEMP_DIR, upload.name)
        with open(temp_path, "wb") as f:
            f.write(upload.read())

        if upload.name.lower().endswith(".csv"):
            csv_path = temp_path
        else:
            try:
                df_xl = pd.read_excel(temp_path, sheet_name=0)
                csv_path = os.path.splitext(temp_path)[0] + ".csv"
                df_xl.to_csv(csv_path, index=False)
            except Exception as e:
                st.error(f"Excel parsing failed: {e}")
                st.stop()
        st.success(f"{upload.name} saved โœ…")

else:  # SQL Database path
    engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
    conn   = st.text_input("SQLAlchemy connection string")
    if conn:
        try:
            tables = list_tables(conn)
            table  = st.selectbox("Table", tables)
            if st.button("Fetch table"):
                csv_path = fetch_data_from_db(conn, table)
                st.success(f"Fetched **{table}** as CSV โœ…")
        except Exception as e:
            st.error(f"Connection failed: {e}")
            st.stop()

if csv_path is None:
    st.stop()

# Offer download of the working CSV
with open(csv_path, "rb") as f:
    st.download_button("โฌ‡๏ธ Download working CSV", f, file_name=os.path.basename(csv_path))

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 4.  COLUMN SELECTION: DATE + METRIC
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
df_head = pd.read_csv(csv_path, nrows=5)
st.dataframe(df_head)

date_col    = st.selectbox("Select date/time column", df_head.columns)
numeric_cols = df_head.select_dtypes("number").columns.tolist()
metric_col  = st.selectbox("Select numeric metric column", numeric_cols)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 5.  LOCAL ANALYTICS: SUMMARY, TREND, FORECAST
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with st.spinner("Parsing datasetโ€ฆ"):
    summary_text = parse_csv_tool(csv_path)

with st.spinner("Generating trend chartโ€ฆ"):
    trend_fig = plot_metric_tool(csv_path, date_col, metric_col)
if isinstance(trend_fig, go.Figure):
    st.plotly_chart(trend_fig, use_container_width=True)
else:
    st.warning(trend_fig)

with st.spinner("Forecastingโ€ฆ"):
    forecast_text = forecast_metric_tool(csv_path, date_col, metric_col)

# Display the forecast image saved under /tmp
st.subheader(f"๐Ÿ”ฎ {metric_col} Forecast")
forecast_png = os.path.join(TEMP_DIR, "forecast_plot.png")
if os.path.exists(forecast_png):
    st.image(forecast_png, use_container_width=True)
else:
    st.warning("Forecast image not found.")

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 6.  GEMINI STRATEGY RECOMMENDATIONS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
prompt = (
    f"You are **BizIntel Strategist AI**.\n\n"
    f"### Dataset Summary\n```\n{summary_text}\n```\n\n"
    f"### {metric_col} Forecast\n```\n{forecast_text}\n```\n\n"
    "Return **Markdown** with:\n"
    "1. Five key insights\n"
    "2. Three actionable strategies\n"
    "3. Risk factors or anomalies\n"
    "4. Suggested additional visuals\n"
)

st.subheader("๐Ÿš€ Strategy Recommendations (Gemini 1.5 Pro)")
with st.spinner("Generating insightsโ€ฆ"):
    strategy_md = gemini.generate_content(prompt).text
st.markdown(strategy_md)
st.download_button("โฌ‡๏ธ Download Strategy (.md)", strategy_md, file_name="strategy.md")

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 7.  KPI CARDS + DETAILED STATS EXPANDER
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
full_df    = pd.read_csv(csv_path, low_memory=False)
total_rows = len(full_df)
num_cols   = len(full_df.columns)
missing_pct = full_df.isna().mean().mean() * 100

st.markdown("---")
st.subheader("๐Ÿ“‘ Dataset Overview")
col1, col2, col3 = st.columns(3)
col1.metric("Rows",     f"{total_rows:,}")
col2.metric("Columns",  str(num_cols))
col3.metric("Missing %", f"{missing_pct:.1f}%")

with st.expander("๐Ÿ”Ž Detailed descriptive statistics"):
    stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
    st.dataframe(
        stats_df.style.format(precision=2).background_gradient(cmap="Blues"),
        use_container_width=True,
    )

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 8.  OPTIONAL EXPLORATORY VISUALS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.markdown("---")
st.subheader("๐Ÿ” Optional Exploratory Visuals")

if st.checkbox("Histogram"):
    hist_col = st.selectbox("Variable", numeric_cols, key="hist")
    st.plotly_chart(histogram_tool(csv_path, hist_col), use_container_width=True)

if st.checkbox("Scatter-matrix"):
    sel = st.multiselect("Choose columns", numeric_cols, default=numeric_cols[:3])
    if sel:
        st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)

if st.checkbox("Correlation heat-map"):
    st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)