mgbam commited on
Commit
d037161
ยท
verified ยท
1 Parent(s): f0be302

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -36
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py โ€” BizIntelย AIย Ultra (CSV, Excel, DB; largeโ€‘file safe)
2
 
3
  import os
4
  import tempfile
@@ -17,7 +17,7 @@ from tools.visuals import histogram_tool, scatter_matrix_tool, corr_heatmap_tool
17
  from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
18
 
19
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
20
- # 1. GEMINI CONFIG
21
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
22
  genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
23
  gemini = genai.GenerativeModel(
@@ -34,7 +34,7 @@ st.title("๐Ÿ“Š BizIntelย AIย Ultraย โ€“ Advanced Analytics + Geminiย 1.5ย Pro")
34
  TEMP_DIR = tempfile.gettempdir()
35
 
36
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
37
- # 3. DATA SOURCE (CSV, Excel, or DB)
38
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
39
  source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
40
 
@@ -46,28 +46,24 @@ if source == "Upload CSV / Excel":
46
  if up:
47
  suffix = up.name.split(".")[-1].lower()
48
  temp_path = os.path.join(TEMP_DIR, up.name)
49
-
50
  with open(temp_path, "wb") as f:
51
  f.write(up.read())
52
 
53
- if suffix in {"csv"}:
54
  csv_path = temp_path
55
  file_type = "csv"
56
- else: # Excel โ†’ convert to CSV for downstream tools
57
  file_type = "excel"
58
  try:
59
- df_iter = pd.read_excel(temp_path, sheet_name=0, chunksize=100000) # chunk read
60
  csv_path = os.path.splitext(temp_path)[0] + ".csv"
61
- with open(csv_path, "w", newline="", encoding="utf-8") as csv_out:
62
- for i, chunk in enumerate(df_iter):
63
- header = i == 0
64
- chunk.to_csv(csv_out, index=False, header=header, mode="a")
65
  except Exception as e:
66
  st.error(f"Excel parsing failed: {e}")
67
  st.stop()
68
- st.success(f"{up.name} savedย โœ…")
69
 
70
- else: # SQL DB path
71
  engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
72
  conn = st.text_input("SQLAlchemy connection string")
73
  if conn:
@@ -77,7 +73,7 @@ else: # SQL DB path
77
  if st.button("Fetch table"):
78
  csv_path = fetch_data_from_db(conn, tbl)
79
  file_type = "csv"
80
- st.success(f"Fetched **{tbl}** as CSVย โœ…")
81
  except Exception as e:
82
  st.error(f"Connection failed: {e}")
83
  st.stop()
@@ -85,16 +81,16 @@ else: # SQL DB path
85
  if csv_path is None:
86
  st.stop()
87
 
88
- # Offer original download
89
  with open(csv_path, "rb") as f:
90
  st.download_button("โฌ‡๏ธย Download working CSV", f, file_name=os.path.basename(csv_path))
91
 
92
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
93
- # 4. PREVIEW & DATE COLUMN
94
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
95
- preview_df = pd.read_csv(csv_path, nrows=5)
96
- st.dataframe(preview_df)
97
- date_col = st.selectbox("Select date/time column for forecasting", preview_df.columns)
98
 
99
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
100
  # 5. LOCAL TOOLS
@@ -102,16 +98,14 @@ date_col = st.selectbox("Select date/time column for forecasting", preview_df.co
102
  with st.spinner("Parsing datasetโ€ฆ"):
103
  summary_text = parse_csv_tool(csv_path)
104
 
105
- # Trend chart
106
- with st.spinner("Generating sales trendโ€ฆ"):
107
  sales_fig = plot_sales_tool(csv_path, date_col=date_col)
108
  if isinstance(sales_fig, go.Figure):
109
  st.plotly_chart(sales_fig, use_container_width=True)
110
  else:
111
  st.warning(sales_fig)
112
 
113
- # Forecast
114
- with st.spinner("Forecastingโ€ฆ"):
115
  forecast_text = forecast_tool(csv_path, date_col=date_col)
116
  forecast_png = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None
117
 
@@ -141,18 +135,17 @@ st.download_button("โฌ‡๏ธย Download Strategy (.md)", strategy_md, file_name="st
141
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
142
  # 7. KPI CARDS + EXPANDER
143
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
144
- full_df = pd.read_csv(csv_path, nrows=None, low_memory=False)
145
  total_rows = len(full_df)
146
- num_cols = len(full_df.columns)
147
  missing_pct = full_df.isna().mean().mean() * 100
148
 
149
  st.markdown("---")
150
  st.subheader("๐Ÿ“‘ Dataset Overview")
151
-
152
- k1, k2, k3 = st.columns(3)
153
- k1.metric("Rows", f"{total_rows:,}")
154
- k2.metric("Columns", str(num_cols))
155
- k3.metric("Missingย %", f"{missing_pct:.1f}%")
156
 
157
  with st.expander("๐Ÿ”Žย Detailed descriptive statistics"):
158
  stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
@@ -167,16 +160,16 @@ with st.expander("๐Ÿ”Žย Detailed descriptive statistics"):
167
  st.markdown("---")
168
  st.subheader("๐Ÿ” Optional Exploratory Visuals")
169
 
170
- numeric_cols = preview_df.select_dtypes("number").columns
171
 
172
  if st.checkbox("Histogram"):
173
- hist_col = st.selectbox("Variable", numeric_cols, key="hist")
174
- st.plotly_chart(histogram_tool(csv_path, hist_col), use_container_width=True)
175
 
176
  if st.checkbox("Scatterโ€‘matrix"):
177
- sel_cols = st.multiselect("Choose columns", numeric_cols, default=numeric_cols[:3])
178
- if sel_cols:
179
- st.plotly_chart(scatter_matrix_tool(csv_path, sel_cols), use_container_width=True)
180
 
181
  if st.checkbox("Correlation heatโ€‘map"):
182
  st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)
 
1
+ # app.py โ€” BizIntelย AIย Ultra (CSV, Excel, DB; Plotly, Geminiโ€ฏ1.5โ€ฏPro)
2
 
3
  import os
4
  import tempfile
 
17
  from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
18
 
19
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
20
+ # 1. GEMINI 1.5โ€‘PRO
21
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
22
  genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
23
  gemini = genai.GenerativeModel(
 
34
  TEMP_DIR = tempfile.gettempdir()
35
 
36
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
37
+ # 3. DATA SOURCE (CSV, Excel, or DB)
38
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
39
  source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
40
 
 
46
  if up:
47
  suffix = up.name.split(".")[-1].lower()
48
  temp_path = os.path.join(TEMP_DIR, up.name)
 
49
  with open(temp_path, "wb") as f:
50
  f.write(up.read())
51
 
52
+ if suffix == "csv":
53
  csv_path = temp_path
54
  file_type = "csv"
55
+ else: # Excel โ†’ convert sheet0 to CSV
56
  file_type = "excel"
57
  try:
58
+ df_excel = pd.read_excel(temp_path, sheet_name=0) # loads first sheet
59
  csv_path = os.path.splitext(temp_path)[0] + ".csv"
60
+ df_excel.to_csv(csv_path, index=False)
 
 
 
61
  except Exception as e:
62
  st.error(f"Excel parsing failed: {e}")
63
  st.stop()
64
+ st.success(f"{up.name} saved โœ…")
65
 
66
+ else: # SQL DB
67
  engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
68
  conn = st.text_input("SQLAlchemy connection string")
69
  if conn:
 
73
  if st.button("Fetch table"):
74
  csv_path = fetch_data_from_db(conn, tbl)
75
  file_type = "csv"
76
+ st.success(f"Fetched **{tbl}** as CSV โœ…")
77
  except Exception as e:
78
  st.error(f"Connection failed: {e}")
79
  st.stop()
 
81
  if csv_path is None:
82
  st.stop()
83
 
84
+ # Download working CSV
85
  with open(csv_path, "rb") as f:
86
  st.download_button("โฌ‡๏ธย Download working CSV", f, file_name=os.path.basename(csv_path))
87
 
88
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
89
+ # 4. PREVIEW & DATE COL
90
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
91
+ df_preview = pd.read_csv(csv_path, nrows=5)
92
+ st.dataframe(df_preview)
93
+ date_col = st.selectbox("Select date/time column for forecasting", df_preview.columns)
94
 
95
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
96
  # 5. LOCAL TOOLS
 
98
  with st.spinner("Parsing datasetโ€ฆ"):
99
  summary_text = parse_csv_tool(csv_path)
100
 
101
+ with st.spinner("๐Ÿ“ˆ Generating sales trendโ€ฆ"):
 
102
  sales_fig = plot_sales_tool(csv_path, date_col=date_col)
103
  if isinstance(sales_fig, go.Figure):
104
  st.plotly_chart(sales_fig, use_container_width=True)
105
  else:
106
  st.warning(sales_fig)
107
 
108
+ with st.spinner("๐Ÿ”ฎ Forecastingโ€ฆ"):
 
109
  forecast_text = forecast_tool(csv_path, date_col=date_col)
110
  forecast_png = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None
111
 
 
135
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
136
  # 7. KPI CARDS + EXPANDER
137
  # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
138
+ full_df = pd.read_csv(csv_path, low_memory=False)
139
  total_rows = len(full_df)
140
+ num_cols = len(full_df.columns)
141
  missing_pct = full_df.isna().mean().mean() * 100
142
 
143
  st.markdown("---")
144
  st.subheader("๐Ÿ“‘ Dataset Overview")
145
+ c1, c2, c3 = st.columns(3)
146
+ c1.metric("Rows", f"{total_rows:,}")
147
+ c2.metric("Columns", str(num_cols))
148
+ c3.metric("Missingย %", f"{missing_pct:.1f}%")
 
149
 
150
  with st.expander("๐Ÿ”Žย Detailed descriptive statistics"):
151
  stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
 
160
  st.markdown("---")
161
  st.subheader("๐Ÿ” Optional Exploratory Visuals")
162
 
163
+ num_cols = df_preview.select_dtypes("number").columns
164
 
165
  if st.checkbox("Histogram"):
166
+ hcol = st.selectbox("Variable", num_cols, key="hist")
167
+ st.plotly_chart(histogram_tool(csv_path, hcol), use_container_width=True)
168
 
169
  if st.checkbox("Scatterโ€‘matrix"):
170
+ sel = st.multiselect("Choose columns", num_cols, default=num_cols[:3])
171
+ if sel:
172
+ st.plotly_chart(scatter_matrix_tool(csv_path, sel), use_container_width=True)
173
 
174
  if st.checkbox("Correlation heatโ€‘map"):
175
  st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)