Spaces:

mgbam
/

BizIntel_AI

Sleeping

App Files Files Community

mgbam commited on May 7

Commit

92cca14

verified ·

1 Parent(s): de6f1e8

Update tools/forecaster.py

Browse files

Files changed (1) hide show

tools/forecaster.py +44 -54

tools/forecaster.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import os
 import tempfile
 import pandas as pd
 from statsmodels.tsa.arima.model import ARIMA
 import plotly.graph_objects as go
 def forecast_metric_tool(
@@ -11,44 +13,38 @@ def forecast_metric_tool(
     value_col: str,
     periods: int = 3,
     output_dir: str = "/tmp"
-):
     """
-    Load a CSV or Excel file, parse a time series metric, fit an ARIMA(1,1,1) model,
-    forecast the next `periods` steps, and save a combined history+forecast plot.
     Returns:
-      forecast_df (pd.DataFrame): next-period predicted values, indexed by date.
-      plot_path (str): full path to the saved PNG plot.
-    Errors return a string starting with '❌' describing the problem.
     """
-    # 0) Load data (CSV or Excel)
     ext = os.path.splitext(file_path)[1].lower()
     try:
-        if ext in ('.xls', '.xlsx'):
-            df = pd.read_excel(file_path)
-        else:
-            df = pd.read_csv(file_path)
-    except Exception as e:
-        return f"❌ Failed to load file: {e}"
-    # 1) Validate columns
-    for col in (date_col, value_col):
-        if col not in df.columns:
-            return f"❌ Column '{col}' not found."
-    # 2) Parse dates and numeric
     try:
-        df[date_col] = pd.to_datetime(df[date_col])
     except Exception:
         return f"❌ Could not parse '{date_col}' as dates."
     df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
     df = df.dropna(subset=[date_col, value_col])
     if df.empty:
-        return f"❌ No valid rows after dropping NaNs in '{date_col}'/'{value_col}'."
-    # 3) Aggregate duplicates & index
     df = (
         df[[date_col, value_col]]
         .groupby(date_col, as_index=True)
@@ -56,56 +52,50 @@ def forecast_metric_tool(
         .sort_index()
     )
-    # 4) Infer frequency
-    freq = pd.infer_freq(df.index)
-    if freq is None:
-        freq = 'D'  # fallback
     try:
         df = df.asfreq(freq)
-    except ValueError as e:
-        # if duplicates remain
         df = df[~df.index.duplicated(keep='first')].asfreq(freq)
-    # 5) Fit ARIMA
     try:
         model = ARIMA(df[value_col], order=(1, 1, 1))
         fit = model.fit()
-    except Exception as e:
-        return f"❌ ARIMA fitting failed: {e}"
-    # 6) Forecast future
-    fc_res = fit.get_forecast(steps=periods)
-    forecast = fc_res.predicted_mean
     forecast_df = forecast.to_frame(name='Forecast')
-    # 7) Plot history + forecast
-    fig = go.Figure()
-    fig.add_trace(
-        go.Scatter(
-            x=df.index, y=df[value_col],
-            mode='lines+markers', name=value_col
-        )
-    )
-    fig.add_trace(
-        go.Scatter(
-            x=forecast.index, y=forecast,
-            mode='lines+markers', name='Forecast'
-        )
     )
     fig.update_layout(
         title=f"{value_col} Forecast",
         xaxis_title=date_col,
         yaxis_title=value_col,
-        template='plotly_dark',
     )
-    # 8) Save to temporary file
     os.makedirs(output_dir, exist_ok=True)
-    tmp = tempfile.NamedTemporaryFile(
-        suffix='.png', prefix='forecast_', dir=output_dir, delete=False
-    )
     plot_path = tmp.name
     tmp.close()
-    fig.write_image(plot_path)
     return forecast_df, plot_path

+# tools/forecaster.py
 import os
 import tempfile
 import pandas as pd
 from statsmodels.tsa.arima.model import ARIMA
 import plotly.graph_objects as go
+from typing import Tuple, Union
 def forecast_metric_tool(
     value_col: str,
     periods: int = 3,
     output_dir: str = "/tmp"
+) -> Union[Tuple[pd.DataFrame, str], str]:
     """
+    Load CSV or Excel, parse a time series metric, fit ARIMA(1,1,1),
+    forecast next `periods` steps, return DataFrame and PNG path.
     Returns:
+      - (forecast_df, plot_path) on success
+      - error string starting with '❌' on failure
     """
+    # Load data
     ext = os.path.splitext(file_path)[1].lower()
     try:
+        df = pd.read_excel(file_path) if ext in ('.xls', '.xlsx') else pd.read_csv(file_path)
+    except Exception as exc:
+        return f"❌ Failed to load file: {exc}"
+    # Validate columns
+    missing = [c for c in (date_col, value_col) if c not in df.columns]
+    if missing:
+        return f"❌ Missing column(s): {', '.join(missing)}"
+    # Parse and clean
     try:
+        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')
     except Exception:
         return f"❌ Could not parse '{date_col}' as dates."
     df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
     df = df.dropna(subset=[date_col, value_col])
     if df.empty:
+        return f"❌ No valid data after cleaning '{date_col}'/'{value_col}'"
+    # Aggregate duplicates and sort
     df = (
         df[[date_col, value_col]]
         .groupby(date_col, as_index=True)
         .sort_index()
     )
+    # Infer frequency
+    freq = pd.infer_freq(df.index) or 'D'
     try:
         df = df.asfreq(freq)
+    except Exception:
         df = df[~df.index.duplicated(keep='first')].asfreq(freq)
+    # Fit ARIMA
     try:
         model = ARIMA(df[value_col], order=(1, 1, 1))
         fit = model.fit()
+    except Exception as exc:
+        return f"❌ ARIMA fitting failed: {exc}"
+    # Forecast
+    try:
+        pred = fit.get_forecast(steps=periods)
+        forecast = pred.predicted_mean
+    except Exception as exc:
+        return f"❌ Forecast generation failed: {exc}"
     forecast_df = forecast.to_frame(name='Forecast')
+    # Plot history + forecast
+    fig = go.Figure(
+        data=[
+            go.Scatter(x=df.index, y=df[value_col], mode='lines', name='History'),
+            go.Scatter(x=forecast.index, y=forecast, mode='lines+markers', name='Forecast')
+        ]
     )
     fig.update_layout(
         title=f"{value_col} Forecast",
         xaxis_title=date_col,
         yaxis_title=value_col,
+        template='plotly_dark'
     )
+    # Save PNG
     os.makedirs(output_dir, exist_ok=True)
+    tmp = tempfile.NamedTemporaryFile(suffix='.png', prefix='forecast_', dir=output_dir, delete=False)
     plot_path = tmp.name
     tmp.close()
+    try:
+        fig.write_image(plot_path, scale=2)
+    except Exception as exc:
+        return f"❌ Plot saving failed: {exc}"
     return forecast_df, plot_path