File size: 8,624 Bytes
d037161
e490e03
309eec4
 
c7c64f3
f0be302
773f0cf
2c362d2
309eec4
10c7dea
3acbc9c
309eec4
10c7dea
773f0cf
c7c64f3
 
773f0cf
10c7dea
e490e03
d037161
e490e03
10c7dea
2c362d2
b3a1f0c
f0be302
2c362d2
67e3963
e490e03
c7c64f3
e490e03
773f0cf
c7c64f3
67e3963
72a73ff
 
e490e03
d037161
e490e03
f0be302
773f0cf
f0be302
 
 
 
 
c7c64f3
f0be302
 
 
c7c64f3
773f0cf
d037161
f0be302
 
d037161
f0be302
 
d037161
f0be302
d037161
f0be302
 
 
d037161
f0be302
d037161
773f0cf
c7c64f3
 
773f0cf
c7c64f3
 
 
 
f0be302
d037161
773f0cf
 
 
 
 
309eec4
 
d037161
773f0cf
f0be302
10c7dea
773f0cf
d037161
773f0cf
d037161
 
 
10c7dea
e490e03
f0be302
e490e03
f0be302
2c362d2
10c7dea
d037161
3acbc9c
e490e03
3acbc9c
773f0cf
e490e03
10c7dea
d037161
c7c64f3
 
e490e03
c7c64f3
f0be302
773f0cf
e490e03
c7c64f3
e490e03
523228c
 
c7c64f3
 
e490e03
773f0cf
f0be302
e490e03
 
523228c
 
 
773f0cf
3acbc9c
d826a13
c7c64f3
 
 
f0be302
c7c64f3
d037161
c7c64f3
d037161
c7c64f3
 
f0be302
 
d037161
 
 
 
523228c
c7c64f3
 
 
 
 
 
773f0cf
e490e03
c7c64f3
e490e03
523228c
3acbc9c
 
d037161
523228c
773f0cf
d037161
 
d826a13
773f0cf
d037161
 
 
3acbc9c
773f0cf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# app.py  โ€”  BizIntelย AIย Ultra  (CSV, Excel, DB; Plotly, Geminiโ€ฏ1.5โ€ฏPro)

import os
import tempfile
from io import StringIO
from typing import Literal

import pandas as pd
import streamlit as st
import google.generativeai as genai
import plotly.graph_objects as go

from tools.csv_parser import parse_csv_tool
from tools.plot_generator import plot_sales_tool
from tools.forecaster import forecast_tool
from tools.visuals import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 1.  GEMINI 1.5โ€‘PRO
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
gemini = genai.GenerativeModel(
    "gemini-1.5-pro-latest",
    generation_config={"temperature": 0.7, "top_p": 0.9, "response_mime_type": "text/plain"},
)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 2.  PAGE SETUP
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.set_page_config(page_title="BizIntelย AIย Ultra", layout="wide")
st.title("๐Ÿ“Š BizIntelย AIย Ultraย โ€“ Advanced Analytics + Geminiย 1.5ย Pro")

TEMP_DIR = tempfile.gettempdir()

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 3.  DATA SOURCE  (CSV, Excel, or DB)
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])

csv_path: str | None = None
file_type: Literal["csv", "excel"] | None = None

if source == "Upload CSV / Excel":
    up = st.file_uploader("Upload CSV or Excel (โ‰คโ€ฏ500โ€ฏMB)", type=["csv", "xlsx", "xls"])
    if up:
        suffix = up.name.split(".")[-1].lower()
        temp_path = os.path.join(TEMP_DIR, up.name)
        with open(temp_path, "wb") as f:
            f.write(up.read())

        if suffix == "csv":
            csv_path = temp_path
            file_type = "csv"
        else:  # Excel โ†’ convert sheet0 to CSV
            file_type = "excel"
            try:
                df_excel = pd.read_excel(temp_path, sheet_name=0)  # loads first sheet
                csv_path = os.path.splitext(temp_path)[0] + ".csv"
                df_excel.to_csv(csv_path, index=False)
            except Exception as e:
                st.error(f"Excel parsing failed: {e}")
                st.stop()
        st.success(f"{up.name} saved โœ…")

else:  # SQL DB
    engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
    conn = st.text_input("SQLAlchemy connection string")
    if conn:
        try:
            tbls = list_tables(conn)
            tbl = st.selectbox("Table", tbls)
            if st.button("Fetch table"):
                csv_path = fetch_data_from_db(conn, tbl)
                file_type = "csv"
                st.success(f"Fetched **{tbl}** as CSV โœ…")
        except Exception as e:
            st.error(f"Connection failed: {e}")
            st.stop()

if csv_path is None:
    st.stop()

# Download working CSV
with open(csv_path, "rb") as f:
    st.download_button("โฌ‡๏ธย Download working CSV", f, file_name=os.path.basename(csv_path))

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 4.  PREVIEW & DATE COL
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
df_preview = pd.read_csv(csv_path, nrows=5)
st.dataframe(df_preview)
date_col = st.selectbox("Select date/time column for forecasting", df_preview.columns)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 5.  LOCAL TOOLS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with st.spinner("Parsing datasetโ€ฆ"):
    summary_text = parse_csv_tool(csv_path)

with st.spinner("๐Ÿ“ˆ Generating sales trendโ€ฆ"):
    sales_fig = plot_sales_tool(csv_path, date_col=date_col)
if isinstance(sales_fig, go.Figure):
    st.plotly_chart(sales_fig, use_container_width=True)
else:
    st.warning(sales_fig)

with st.spinner("๐Ÿ”ฎ Forecastingโ€ฆ"):
    forecast_text = forecast_tool(csv_path, date_col=date_col)
    forecast_png = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None

if forecast_png:
    st.image(forecast_png, caption="Sales Forecast", use_column_width=True)

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 6.  GEMINI STRATEGY
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
prompt = (
    f"You are **BizIntel Strategist AI**.\n\n"
    f"### CSV Summary\n```\n{summary_text}\n```\n\n"
    f"### Forecast Output\n```\n{forecast_text}\n```\n\n"
    "Return **Markdown** with:\n"
    "1. Five key insights\n"
    "2. Three actionable strategies\n"
    "3. Risk factors or anomalies\n"
    "4. Suggested additional visuals\n"
)

st.subheader("๐Ÿš€ Strategy Recommendations (Geminiย 1.5ย Pro)")
with st.spinner("Generating insightsโ€ฆ"):
    strategy_md = gemini.generate_content(prompt).text
st.markdown(strategy_md)
st.download_button("โฌ‡๏ธย Download Strategy (.md)", strategy_md, file_name="strategy.md")

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 7.  KPI CARDS + EXPANDER
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
full_df = pd.read_csv(csv_path, low_memory=False)
total_rows = len(full_df)
num_cols   = len(full_df.columns)
missing_pct = full_df.isna().mean().mean() * 100

st.markdown("---")
st.subheader("๐Ÿ“‘ Dataset Overview")
c1, c2, c3 = st.columns(3)
c1.metric("Rows",     f"{total_rows:,}")
c2.metric("Columns",  str(num_cols))
c3.metric("Missingย %", f"{missing_pct:.1f}%")

with st.expander("๐Ÿ”Žย Detailed descriptive statistics"):
    stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
    st.dataframe(
        stats_df.style.format(precision=2).background_gradient(cmap="Blues"),
        use_container_width=True,
    )

# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# 8.  OPTIONAL EXPLORATORY VISUALS
# โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
st.markdown("---")
st.subheader("๐Ÿ” Optional Exploratory Visuals")

num_cols = df_preview.select_dtypes("number").columns

if st.checkbox("Histogram"):
    hcol = st.selectbox("Variable", num_cols, key="hist")
    st.plotly_chart(histogram_tool(csv_path, hcol), use_container_width=True)

if st.checkbox("Scatterโ€‘matrix"):
    sel = st.multiselect("Choose columns", num_cols, default=num_cols[:3])
    if sel:
        st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)

if st.checkbox("Correlation heatโ€‘map"):
    st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)