Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

mgbam commited on May 6

Commit

f0be302

verified ·

1 Parent(s): f77922b

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -41

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
-# app.py  —  BizIntel AI Ultra  (Gemini 1.5 Pro, CSV + DB, interactive Plotly, pro summary)
 import os
 import tempfile
 from io import StringIO
 import pandas as pd
 import streamlit as st
@@ -21,11 +22,7 @@ from db_connector import fetch_data_from_db, list_tables, SUPPORTED_ENGINES
 genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
 gemini = genai.GenerativeModel(
     "gemini-1.5-pro-latest",
-    generation_config={
-        "temperature": 0.7,
-        "top_p": 0.9,
-        "response_mime_type": "text/plain",
-    },
 )
 # ──────────────────────────────────────────────────────────────
@@ -37,20 +34,40 @@ st.title("📊 BizIntel AI Ultra – Advanced Analytics + Gemini 1.5 Pro")
 TEMP_DIR = tempfile.gettempdir()
 # ──────────────────────────────────────────────────────────────
-# 3.  DATA SOURCE (CSV OR DB)
 # ──────────────────────────────────────────────────────────────
-source = st.radio("Select data source", ["Upload CSV", "Connect to SQL Database"])
-csv_path = None
-if source == "Upload CSV":
-    up = st.file_uploader("Upload CSV (≤ 200 MB)", type=["csv"])
     if up:
-        csv_path = os.path.join(TEMP_DIR, up.name)
-        with open(csv_path, "wb") as f:
             f.write(up.read())
-        st.success("CSV saved ✅")
-else:
     engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
     conn = st.text_input("SQLAlchemy connection string")
     if conn:
@@ -59,7 +76,8 @@ else:
             tbl = st.selectbox("Table", tbls)
             if st.button("Fetch table"):
                 csv_path = fetch_data_from_db(conn, tbl)
-                st.success(f"Fetched **{tbl}** as CSV ✅")
         except Exception as e:
             st.error(f"Connection failed: {e}")
             st.stop()
@@ -67,23 +85,24 @@ else:
 if csv_path is None:
     st.stop()
-# Download original CSV
 with open(csv_path, "rb") as f:
-    st.download_button("⬇️ Download original CSV", f, file_name=os.path.basename(csv_path))
 # ──────────────────────────────────────────────────────────────
 # 4.  PREVIEW & DATE COLUMN
 # ──────────────────────────────────────────────────────────────
-df_preview = pd.read_csv(csv_path, nrows=5)
-st.dataframe(df_preview)
-date_col = st.selectbox("Select date/time column for forecasting", df_preview.columns)
 # ──────────────────────────────────────────────────────────────
-# 5.  LOCAL TOOLS: SUMMARY, SALES TREND, FORECAST
 # ──────────────────────────────────────────────────────────────
-with st.spinner("Parsing CSV…"):
     summary_text = parse_csv_tool(csv_path)
 with st.spinner("Generating sales trend…"):
     sales_fig = plot_sales_tool(csv_path, date_col=date_col)
 if isinstance(sales_fig, go.Figure):
@@ -91,12 +110,13 @@ if isinstance(sales_fig, go.Figure):
 else:
     st.warning(sales_fig)
 with st.spinner("Forecasting…"):
     forecast_text = forecast_tool(csv_path, date_col=date_col)
     forecast_png = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None
 if forecast_png:
-    st.image(forecast_png, caption="Sales Forecast", use_container_width=True)
 # ──────────────────────────────────────────────────────────────
 # 6.  GEMINI STRATEGY
@@ -107,7 +127,7 @@ prompt = (
     f"### Forecast Output\n```\n{forecast_text}\n```\n\n"
     "Return **Markdown** with:\n"
     "1. Five key insights\n"
-    "2. Three actionable strategies (with expected impact)\n"
     "3. Risk factors or anomalies\n"
     "4. Suggested additional visuals\n"
 )
@@ -119,20 +139,20 @@ st.markdown(strategy_md)
 st.download_button("⬇️ Download Strategy (.md)", strategy_md, file_name="strategy.md")
 # ──────────────────────────────────────────────────────────────
-# 7.  PROFESSIONAL CSV SUMMARY
 # ──────────────────────────────────────────────────────────────
-st.markdown("---")
-st.subheader("📑 CSV Overview")
-full_df = pd.read_csv(csv_path)
 total_rows = len(full_df)
-num_cols   = len(full_df.columns)
 missing_pct = full_df.isna().mean().mean() * 100
-c1, c2, c3 = st.columns(3)
-c1.metric("Rows",     f"{total_rows:,}")
-c2.metric("Columns",  str(num_cols))
-c3.metric("Missing %", f"{missing_pct:.1f}%")
 with st.expander("🔎 Detailed descriptive statistics"):
     stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
@@ -147,16 +167,16 @@ with st.expander("🔎 Detailed descriptive statistics"):
 st.markdown("---")
 st.subheader("🔍 Optional Exploratory Visuals")
-num_cols_only = df_preview.select_dtypes("number").columns
 if st.checkbox("Histogram"):
-    hcol = st.selectbox("Variable", num_cols_only, key="hist")
-    st.plotly_chart(histogram_tool(csv_path, hcol), use_container_width=True)
 if st.checkbox("Scatter‑matrix"):
-    sm_cols = st.multiselect("Choose up to 5 columns", num_cols_only, default=num_cols_only[:3])
-    if sm_cols:
-        st.plotly_chart(scatter_matrix_tool(csv_path, sm_cols), use_container_width=True)
 if st.checkbox("Correlation heat‑map"):
     st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)

+# app.py  —  BizIntel AI Ultra (CSV, Excel, DB; large‑file safe)
 import os
 import tempfile
 from io import StringIO
+from typing import Literal
 import pandas as pd
 import streamlit as st
 genai.configure(api_key=os.getenv("GEMINI_APIKEY"))
 gemini = genai.GenerativeModel(
     "gemini-1.5-pro-latest",
+    generation_config={"temperature": 0.7, "top_p": 0.9, "response_mime_type": "text/plain"},
 )
 # ──────────────────────────────────────────────────────────────
 TEMP_DIR = tempfile.gettempdir()
 # ──────────────────────────────────────────────────────────────
+# 3.  DATA SOURCE (CSV, Excel, or DB)
 # ──────────────────────────────────────────────────────────────
+source = st.radio("Select data source", ["Upload CSV / Excel", "Connect to SQL Database"])
+csv_path: str | None = None
+file_type: Literal["csv", "excel"] | None = None
+if source == "Upload CSV / Excel":
+    up = st.file_uploader("Upload CSV or Excel (≤ 500 MB)", type=["csv", "xlsx", "xls"])
     if up:
+        suffix = up.name.split(".")[-1].lower()
+        temp_path = os.path.join(TEMP_DIR, up.name)
+        with open(temp_path, "wb") as f:
             f.write(up.read())
+        if suffix in {"csv"}:
+            csv_path = temp_path
+            file_type = "csv"
+        else:  # Excel → convert to CSV for downstream tools
+            file_type = "excel"
+            try:
+                df_iter = pd.read_excel(temp_path, sheet_name=0, chunksize=100000)  # chunk read
+                csv_path = os.path.splitext(temp_path)[0] + ".csv"
+                with open(csv_path, "w", newline="", encoding="utf-8") as csv_out:
+                    for i, chunk in enumerate(df_iter):
+                        header = i == 0
+                        chunk.to_csv(csv_out, index=False, header=header, mode="a")
+            except Exception as e:
+                st.error(f"Excel parsing failed: {e}")
+                st.stop()
+        st.success(f"{up.name} saved ✅")
+else:  # SQL DB path
     engine = st.selectbox("DB engine", SUPPORTED_ENGINES)
     conn = st.text_input("SQLAlchemy connection string")
     if conn:
             tbl = st.selectbox("Table", tbls)
             if st.button("Fetch table"):
                 csv_path = fetch_data_from_db(conn, tbl)
+                file_type = "csv"
+                st.success(f"Fetched **{tbl}** as CSV ✅")
         except Exception as e:
             st.error(f"Connection failed: {e}")
             st.stop()
 if csv_path is None:
     st.stop()
+# Offer original download
 with open(csv_path, "rb") as f:
+    st.download_button("⬇️ Download working CSV", f, file_name=os.path.basename(csv_path))
 # ──────────────────────────────────────────────────────────────
 # 4.  PREVIEW & DATE COLUMN
 # ──────────────────────────────────────────────────────────────
+preview_df = pd.read_csv(csv_path, nrows=5)
+st.dataframe(preview_df)
+date_col = st.selectbox("Select date/time column for forecasting", preview_df.columns)
 # ──────────────────────────────────────────────────────────────
+# 5.  LOCAL TOOLS
 # ──────────────────────────────────────────────────────────────
+with st.spinner("Parsing dataset…"):
     summary_text = parse_csv_tool(csv_path)
+# Trend chart
 with st.spinner("Generating sales trend…"):
     sales_fig = plot_sales_tool(csv_path, date_col=date_col)
 if isinstance(sales_fig, go.Figure):
 else:
     st.warning(sales_fig)
+# Forecast
 with st.spinner("Forecasting…"):
     forecast_text = forecast_tool(csv_path, date_col=date_col)
     forecast_png = "forecast_plot.png" if os.path.exists("forecast_plot.png") else None
 if forecast_png:
+    st.image(forecast_png, caption="Sales Forecast", use_column_width=True)
 # ──────────────────────────────────────────────────────────────
 # 6.  GEMINI STRATEGY
     f"### Forecast Output\n```\n{forecast_text}\n```\n\n"
     "Return **Markdown** with:\n"
     "1. Five key insights\n"
+    "2. Three actionable strategies\n"
     "3. Risk factors or anomalies\n"
     "4. Suggested additional visuals\n"
 )
 st.download_button("⬇️ Download Strategy (.md)", strategy_md, file_name="strategy.md")
 # ──────────────────────────────────────────────────────────────
+# 7.  KPI CARDS + EXPANDER
 # ──────────────────────────────────────────────────────────────
+full_df = pd.read_csv(csv_path, nrows=None, low_memory=False)
 total_rows = len(full_df)
+num_cols = len(full_df.columns)
 missing_pct = full_df.isna().mean().mean() * 100
+st.markdown("---")
+st.subheader("📑 Dataset Overview")
+k1, k2, k3 = st.columns(3)
+k1.metric("Rows",     f"{total_rows:,}")
+k2.metric("Columns",  str(num_cols))
+k3.metric("Missing %", f"{missing_pct:.1f}%")
 with st.expander("🔎 Detailed descriptive statistics"):
     stats_df = full_df.describe().T.reset_index().rename(columns={"index": "Feature"})
 st.markdown("---")
 st.subheader("🔍 Optional Exploratory Visuals")
+numeric_cols = preview_df.select_dtypes("number").columns
 if st.checkbox("Histogram"):
+    hist_col = st.selectbox("Variable", numeric_cols, key="hist")
+    st.plotly_chart(histogram_tool(csv_path, hist_col), use_container_width=True)
 if st.checkbox("Scatter‑matrix"):
+    sel_cols = st.multiselect("Choose columns", numeric_cols, default=numeric_cols[:3])
+    if sel_cols:
+        st.plotly_chart(scatter_matrix_tool(csv_path, sel_cols), use_container_width=True)
 if st.checkbox("Correlation heat‑map"):
     st.plotly_chart(corr_heatmap_tool(csv_path), use_container_width=True)