mgbam commited on
Commit
f0be302
Β·
verified Β·
1 Parent(s): f77922b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -41
app.py CHANGED
@@ -1,8 +1,9 @@
1
- # app.py β€” BizIntelΒ AIΒ Ultra (Geminiβ€―1.5Β Pro, CSVβ€―+β€―DB, interactive Plotly, pro summary)
2
 
3
  import os
4
  import tempfile
5
  from io import StringIO
 
6
 
7
  import pandas as pd
8
  import streamlit as st
@@ -21,11 +22,7 @@ from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
21
  genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
22
  gemini = genai.GenerativeModel(
23
  "gemini-1.5-pro-latest",
24
- generation_config={
25
- "temperature": 0.7,
26
- "top_p": 0.9,
27
- "response_mime_type": "text/plain",
28
- },
29
  )
30
 
31
  # ──────────────────────────────────────────────────────────────
@@ -37,20 +34,40 @@ st.title("πŸ“Š BizIntelΒ AIΒ Ultra – Advanced Analytics + GeminiΒ 1.5Β Pro")
37
  TEMP_DIR = tempfile.gettempdir()
38
 
39
  # ──────────────────────────────────────────────────────────────
40
- # 3. DATA SOURCE (CSV OR DB)
41
  # ──────────────────────────────────────────────────────────────
42
- source = st.radio("Select data source", ["Upload CSV", "Connect to SQL Database"])
43
- csv_path = None
44
 
45
- if source == "Upload CSV":
46
- up = st.file_uploader("Upload CSV (≀ 200β€―MB)", type=["csv"])
 
 
 
47
  if up:
48
- csv_path = os.path.join(TEMP_DIR, up.name)
49
- with open(csv_path, "wb") as f:
 
 
50
  f.write(up.read())
51
- st.success("CSV saved βœ…")
52
 
53
- else:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
55
  conn = st.text_input("SQLAlchemy connection string")
56
  if conn:
@@ -59,7 +76,8 @@ else:
59
  tbl = st.selectbox("Table", tbls)
60
  if st.button("Fetch table"):
61
  csv_path = fetch_data_from_db(conn, tbl)
62
- st.success(f"Fetched **{tbl}** as CSV βœ…")
 
63
  except Exception as e:
64
  st.error(f"Connection failed: {e}")
65
  st.stop()
@@ -67,23 +85,24 @@ else:
67
  if csv_path is None:
68
  st.stop()
69
 
70
- # Download original CSV
71
  with open(csv_path, "rb") as f:
72
- st.download_button("⬇️ Download original CSV", f, file_name=os.path.basename(csv_path))
73
 
74
  # ──────────────────────────────────────────────────────────────
75
  # 4. PREVIEW & DATE COLUMN
76
  # ──────────────────────────────────────────────────────────────
77
- df_preview = pd.read_csv(csv_path, nrows=5)
78
- st.dataframe(df_preview)
79
- date_col = st.selectbox("Select date/time column for forecasting", df_preview.columns)
80
 
81
  # ──────────────────────────────────────────────────────────────
82
- # 5. LOCAL TOOLS: SUMMARY, SALES TREND, FORECAST
83
  # ──────────────────────────────────────────────────────────────
84
- with st.spinner("Parsing CSV…"):
85
  summary_text = parse_csv_tool(csv_path)
86
 
 
87
  with st.spinner("Generating sales trend…"):
88
  sales_fig = plot_sales_tool(csv_path, date_col=date_col)
89
  if isinstance(sales_fig, go.Figure):
@@ -91,12 +110,13 @@ if isinstance(sales_fig, go.Figure):
91
  else:
92
  st.warning(sales_fig)
93
 
 
94
  with st.spinner("Forecasting…"):
95
  forecast_text = forecast_tool(csv_path, date_col=date_col)
96
  forecast_png = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None
97
 
98
  if forecast_png:
99
- st.image(forecast_png, caption="Sales Forecast", use_container_width=True)
100
 
101
  # ──────────────────────────────────────────────────────────────
102
  # 6. GEMINI STRATEGY
@@ -107,7 +127,7 @@ prompt = (
107
  f"### Forecast Output\n```\n{forecast_text}\n```\n\n"
108
  "Return **Markdown** with:\n"
109
  "1. Five key insights\n"
110
- "2. Three actionable strategies (with expected impact)\n"
111
  "3. Risk factors or anomalies\n"
112
  "4. Suggested additional visuals\n"
113
  )
@@ -119,20 +139,20 @@ st.markdown(strategy_md)
119
  st.download_button("⬇️ Download Strategy (.md)", strategy_md, file_name="strategy.md")
120
 
121
  # ──────────────────────────────────────────────────────────────
122
- # 7. PROFESSIONAL CSV SUMMARY
123
  # ──────────────────────────────────────────────────────────────
124
- st.markdown("---")
125
- st.subheader("πŸ“‘ CSV Overview")
126
-
127
- full_df = pd.read_csv(csv_path)
128
  total_rows = len(full_df)
129
- num_cols = len(full_df.columns)
130
  missing_pct = full_df.isna().mean().mean() * 100
131
 
132
- c1, c2, c3 = st.columns(3)
133
- c1.metric("Rows", f"{total_rows:,}")
134
- c2.metric("Columns", str(num_cols))
135
- c3.metric("MissingΒ %", f"{missing_pct:.1f}%")
 
 
 
136
 
137
  with st.expander("πŸ”ŽΒ Detailed descriptive statistics"):
138
  stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
@@ -147,16 +167,16 @@ with st.expander("πŸ”ŽΒ Detailed descriptive statistics"):
147
  st.markdown("---")
148
  st.subheader("πŸ” Optional Exploratory Visuals")
149
 
150
- num_cols_only = df_preview.select_dtypes("number").columns
151
 
152
  if st.checkbox("Histogram"):
153
- hcol = st.selectbox("Variable", num_cols_only, key="hist")
154
- st.plotly_chart(histogram_tool(csv_path, hcol), use_container_width=True)
155
 
156
  if st.checkbox("Scatter‑matrix"):
157
- sm_cols = st.multiselect("Choose up to 5 columns", num_cols_only, default=num_cols_only[:3])
158
- if sm_cols:
159
- st.plotly_chart(scatter_matrix_tool(csv_path, sm_cols), use_container_width=True)
160
 
161
  if st.checkbox("Correlation heat‑map"):
162
  st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)
 
1
+ # app.py β€” BizIntelΒ AIΒ Ultra (CSV, Excel, DB; large‑file safe)
2
 
3
  import os
4
  import tempfile
5
  from io import StringIO
6
+ from typing import Literal
7
 
8
  import pandas as pd
9
  import streamlit as st
 
22
  genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
23
  gemini = genai.GenerativeModel(
24
  "gemini-1.5-pro-latest",
25
+ generation_config={"temperature": 0.7, "top_p": 0.9, "response_mime_type": "text/plain"},
 
 
 
 
26
  )
27
 
28
  # ──────────────────────────────────────────────────────────────
 
34
  TEMP_DIR = tempfile.gettempdir()
35
 
36
  # ──────────────────────────────────────────────────────────────
37
+ # 3. DATA SOURCE (CSV, Excel, or DB)
38
  # ──────────────────────────────────────────────────────────────
39
+ source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
 
40
 
41
+ csv_path: str | None = None
42
+ file_type: Literal["csv", "excel"] | None = None
43
+
44
+ if source == "Upload CSV / Excel":
45
+ up = st.file_uploader("Upload CSV or Excel (≀ 500β€―MB)", type=["csv", "xlsx", "xls"])
46
  if up:
47
+ suffix = up.name.split(".")[-1].lower()
48
+ temp_path = os.path.join(TEMP_DIR, up.name)
49
+
50
+ with open(temp_path, "wb") as f:
51
  f.write(up.read())
 
52
 
53
+ if suffix in {"csv"}:
54
+ csv_path = temp_path
55
+ file_type = "csv"
56
+ else: # Excel β†’ convert to CSV for downstream tools
57
+ file_type = "excel"
58
+ try:
59
+ df_iter = pd.read_excel(temp_path, sheet_name=0, chunksize=100000) # chunk read
60
+ csv_path = os.path.splitext(temp_path)[0] + ".csv"
61
+ with open(csv_path, "w", newline="", encoding="utf-8") as csv_out:
62
+ for i, chunk in enumerate(df_iter):
63
+ header = i == 0
64
+ chunk.to_csv(csv_out, index=False, header=header, mode="a")
65
+ except Exception as e:
66
+ st.error(f"Excel parsing failed: {e}")
67
+ st.stop()
68
+ st.success(f"{up.name} savedΒ βœ…")
69
+
70
+ else: # SQL DB path
71
  engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
72
  conn = st.text_input("SQLAlchemy connection string")
73
  if conn:
 
76
  tbl = st.selectbox("Table", tbls)
77
  if st.button("Fetch table"):
78
  csv_path = fetch_data_from_db(conn, tbl)
79
+ file_type = "csv"
80
+ st.success(f"Fetched **{tbl}** as CSVΒ βœ…")
81
  except Exception as e:
82
  st.error(f"Connection failed: {e}")
83
  st.stop()
 
85
  if csv_path is None:
86
  st.stop()
87
 
88
+ # Offer original download
89
  with open(csv_path, "rb") as f:
90
+ st.download_button("⬇️ Download working CSV", f, file_name=os.path.basename(csv_path))
91
 
92
  # ──────────────────────────────────────────────────────────────
93
  # 4. PREVIEW & DATE COLUMN
94
  # ──────────────────────────────────────────────────────────────
95
+ preview_df = pd.read_csv(csv_path, nrows=5)
96
+ st.dataframe(preview_df)
97
+ date_col = st.selectbox("Select date/time column for forecasting", preview_df.columns)
98
 
99
  # ──────────────────────────────────────────────────────────────
100
+ # 5. LOCAL TOOLS
101
  # ──────────────────────────────────────────────────────────────
102
+ with st.spinner("Parsing dataset…"):
103
  summary_text = parse_csv_tool(csv_path)
104
 
105
+ # Trend chart
106
  with st.spinner("Generating sales trend…"):
107
  sales_fig = plot_sales_tool(csv_path, date_col=date_col)
108
  if isinstance(sales_fig, go.Figure):
 
110
  else:
111
  st.warning(sales_fig)
112
 
113
+ # Forecast
114
  with st.spinner("Forecasting…"):
115
  forecast_text = forecast_tool(csv_path, date_col=date_col)
116
  forecast_png = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None
117
 
118
  if forecast_png:
119
+ st.image(forecast_png, caption="Sales Forecast", use_column_width=True)
120
 
121
  # ──────────────────────────────────────────────────────────────
122
  # 6. GEMINI STRATEGY
 
127
  f"### Forecast Output\n```\n{forecast_text}\n```\n\n"
128
  "Return **Markdown** with:\n"
129
  "1. Five key insights\n"
130
+ "2. Three actionable strategies\n"
131
  "3. Risk factors or anomalies\n"
132
  "4. Suggested additional visuals\n"
133
  )
 
139
  st.download_button("⬇️ Download Strategy (.md)", strategy_md, file_name="strategy.md")
140
 
141
  # ──────────────────────────────────────────────────────────────
142
+ # 7. KPI CARDS + EXPANDER
143
  # ──────────────────────────────────────────────────────────────
144
+ full_df = pd.read_csv(csv_path, nrows=None, low_memory=False)
 
 
 
145
  total_rows = len(full_df)
146
+ num_cols = len(full_df.columns)
147
  missing_pct = full_df.isna().mean().mean() * 100
148
 
149
+ st.markdown("---")
150
+ st.subheader("πŸ“‘ Dataset Overview")
151
+
152
+ k1, k2, k3 = st.columns(3)
153
+ k1.metric("Rows", f"{total_rows:,}")
154
+ k2.metric("Columns", str(num_cols))
155
+ k3.metric("MissingΒ %", f"{missing_pct:.1f}%")
156
 
157
  with st.expander("πŸ”ŽΒ Detailed descriptive statistics"):
158
  stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
 
167
  st.markdown("---")
168
  st.subheader("πŸ” Optional Exploratory Visuals")
169
 
170
+ numeric_cols = preview_df.select_dtypes("number").columns
171
 
172
  if st.checkbox("Histogram"):
173
+ hist_col = st.selectbox("Variable", numeric_cols, key="hist")
174
+ st.plotly_chart(histogram_tool(csv_path, hist_col), use_container_width=True)
175
 
176
  if st.checkbox("Scatter‑matrix"):
177
+ sel_cols = st.multiselect("Choose columns", numeric_cols, default=numeric_cols[:3])
178
+ if sel_cols:
179
+ st.plotly_chart(scatter_matrix_tool(csv_path, sel_cols), use_container_width=True)
180
 
181
  if st.checkbox("Correlation heat‑map"):
182
  st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)